Merge branch 'develop' into sparse_vector

revert-4814-Add_sequence_project_op
Luo Tao 8 years ago
commit c2feab7ff1

@ -22,7 +22,7 @@ COPY ./paddle/scripts/docker/root/ /root/
RUN apt-get update && \
apt-get install -y \
git python-pip python-dev openssh-server bison \
git python-pip python-dev openssh-server bison libnccl-dev \
wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
curl sed grep graphviz libjpeg-dev zlib1g-dev \
python-matplotlib gcc-4.8 g++-4.8 \

@ -189,7 +189,7 @@ OpDesc {
inputs = {0} // the index of x in vars of BlockDesc above
outputs = {5, 3} // indices of act and hidden_out in vars of BlockDesc above
attrs {
"memories" : {1} // the index of h
"states" : {1} // the index of h
"step_net" : <above step net>
}
};

@ -19,15 +19,15 @@ cc_test(scope_test SRCS scope_test.cc DEPS scope)
proto_library(framework_proto SRCS framework.proto)
cc_library(attribute SRCS attribute.cc DEPS framework_proto)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim op_info)
cc_test(program_desc_test SRCS program_desc_test.cc DEPS proto_desc)
cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute)
cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope proto_desc glog)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim op_info operator)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc)
cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
py_proto_compile(framework_py_proto SRCS framework.proto)

@ -21,6 +21,7 @@
#include "paddle/framework/block_desc.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/dynamic_recurrent_op.h"
#include "paddle/operators/net_op.h"
#include "paddle/operators/recurrent_op.h"
@ -220,8 +221,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
// process recurrent gradient op as a special operator.
if (forwardOp.Type() == "recurrent") {
// NOTE clean up cycle call somewhere (RNN's stepnet constains itself),
// or
// this will result in infinite loop.
// or this will result in infinite loop.
const auto& rnnop =
*static_cast<const operators::RecurrentOp*>(&forwardOp);
auto rnn_grad_op =
@ -231,6 +231,18 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
// create stepnet's gradient op
rnn_grad_op->set_stepnet(
BackwardRecursive(stepnet_op, no_grad_names, grad_to_var, uniq_id));
} else if (forwardOp.Type() == "dynamic_recurrent") {
// NOTE clean up cycle call somewhere (RNN's stepnet constains itself),
// or this will result in infinite loop.
const auto& rnnop =
*static_cast<const operators::DynamicRecurrentOp*>(&forwardOp);
auto rnn_grad_op =
static_cast<operators::DynamicRecurrentGradientOp*>(grad_op.get());
const auto& stepnet_op =
*static_cast<const OperatorBase*>(&rnnop.rnn.GetStepUnit());
// create stepnet's gradient op
rnn_grad_op->rnn.SetStepUnit(
BackwardRecursive(stepnet_op, no_grad_names, grad_to_var, uniq_id));
}
if (net->ops_.empty()) { // Current no aux op is added to network

@ -41,6 +41,19 @@ bool BlockDescBind::HasVar(const std::string &name) const {
return vars_.find(name) != vars_.end();
}
VarDescBind *BlockDescBind::FindVarRecursive(const std::string &name) const {
auto it = vars_.find(name);
if (it == vars_.end()) {
return Parent() == kNoneBlockIndex ? nullptr
: ParentBlock()->FindVarRecursive(name);
}
return it->second.get();
}
bool BlockDescBind::HasVarRecursive(const std::string &name) const {
return FindVarRecursive(name) != nullptr;
}
std::vector<VarDescBind *> BlockDescBind::AllVars() const {
std::vector<VarDescBind *> res;
for (const auto &p : vars_) {
@ -97,7 +110,7 @@ void BlockDescBind::Flush() {
}
BlockDescBind *BlockDescBind::ParentBlock() const {
if (this->desc_->parent_idx() == -1) {
if (this->desc_->parent_idx() == kNoneBlockIndex) {
return nullptr;
}
return prog_->Block(static_cast<size_t>(this->desc_->parent_idx()));

@ -21,6 +21,7 @@ limitations under the License. */
#include <vector>
#include "paddle/framework/op_desc.h"
#include "paddle/framework/proto_desc.h"
#include "paddle/framework/var_desc.h"
#include "paddle/platform/macros.h"
@ -56,6 +57,10 @@ class BlockDescBind {
bool HasVar(const std::string &var_name) const;
VarDescBind *FindVarRecursive(const std::string &name_bytes) const;
bool HasVarRecursive(const std::string &var_name) const;
std::set<std::string> LocalVarNames() const {
std::set<std::string> var_names;
for (auto &var : vars_) {

@ -68,6 +68,7 @@ message OpProto {
optional bool duplicable = 3 [ default = false ];
optional bool intermediate = 4 [ default = false ];
optional bool dispensable = 5 [ default = false ];
}
// AttrProto describes the C++ type Attribute.

@ -25,31 +25,50 @@ LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end) {
for (size_t i = level_begin; i < level_end; i++) {
new_lod.emplace_back(in.at(i));
}
// transform the lowest level to absolute offset.
LoD abs_offset_lod = ToAbsOffset(in);
new_lod.back() = abs_offset_lod[level_end - 1];
return new_lod;
}
LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin,
size_t elem_end) {
// slice the lod.
LoD new_lod;
new_lod.reserve(in.size() - level);
auto start = in.at(level)[elem_begin];
auto end = in.at(level)[elem_end];
for (auto it = in.begin() + level; it != in.end(); it++) {
auto it_begin = std::find(it->begin(), it->end(), start);
auto it_end = std::find(it_begin, it->end(), end);
PADDLE_ENFORCE(it_begin != it->end(), "error in parsing lod info");
PADDLE_ENFORCE(it_end != it->end(), "error in parsing lod info");
new_lod.emplace_back(it_begin, it_end + 1);
// reset offset if tensor is copyed and sliced.
std::transform(new_lod.back().begin(), new_lod.back().end(),
new_lod.back().begin(),
[start](int v) { return v - start; });
PADDLE_ENFORCE_EQ(new_lod.back().front(), 0, "error in slice LoD");
PADDLE_ENFORCE_LT(level, in.size());
PADDLE_ENFORCE_LT(elem_end, in[level].size());
LoD res;
res.resize(in.size() - level);
// copy the first level
res[0].assign(in[level].begin() + elem_begin,
in[level].begin() + elem_end + 1);
for (size_t lvl = 1; lvl < res.size(); lvl++) {
const auto& in_level = in[level + lvl];
const auto& above_level = res[lvl - 1];
auto& out_level = res[lvl];
out_level.assign(in_level.begin() + above_level.front(),
in_level.begin() + above_level.back() + 1);
}
PADDLE_ENFORCE_LE(new_lod.size(), in.size());
return new_lod;
for (size_t lvl = 0; lvl < res.size(); lvl++) {
// to make the first offset equals 0, all the elements minus the first
// element
size_t front = res[lvl].front();
for (auto& ele : res[lvl]) {
ele -= front;
}
}
return res;
}
LoD ToAbsOffset(const LoD& in) {
// the lowest level stores relative offsets
if (in.empty() || in.size() == 1) return in;
LoD result = in;
for (int level = result.size() - 2; level >= 0; level--) {
for (auto& ele : result[level]) {
ele = result[level + 1][ele];
}
}
return result;
}
bool operator==(const LoD& a, const LoD& b) {
@ -75,17 +94,7 @@ bool operator==(const LoD& a, const LoD& b) {
size_t LoDTensor::NumElements(size_t level, size_t idx) const {
PADDLE_ENFORCE_LT(level, NumLevels());
PADDLE_ENFORCE_LT(idx, NumElements(level));
// the last level of LoD, just return number of records in Tensor
if (level == NumLevels() - 1) {
return lod_[level][idx + 1] - lod_[level][idx];
}
// high level of LoD, and there is another lower level, return number of
// lower-level elements
auto tmp = SliceInLevel(lod_, level, idx, idx + 1);
PADDLE_ENFORCE_GE(tmp.size(), 2);
// there is a 0 as a placeholder stored in LoD, so the number of elements
// equals lod.size() - 1
return tmp[1].size() - 1;
}
void LoDTensor::ShrinkLevels(size_t level_begin, size_t level_end) {

@ -39,23 +39,36 @@ using Vector = thrust::host_vector<
#endif
/*
* 3-level LoD stores
*
* 0 10 20
* 0 5 10 15 20
* 0 2 5 7 10 12 15 20
* LoD is short for Level of Details.
*
* - in a level, each element indicates offset in the underlying Tensor
* - in a level, each element indicates relative offset of the lower level
* - the first element should be 0 and that indicates that this sequence start
* from 0
* - each sequence's begin and end(no-inclusive) is level[id, id+1]
*
* For example:
* 3-level LoD stores
*
* 0 2 3
* 0 2 4 7
* 0 2 5 7 10 12 15 20
*/
using LoD = std::vector<Vector<size_t>>;
/*
* Slice levels from a LoD.
* NOTE the lowest level should always be the absolute offsets of the underlying
* tensor instances. So if higher layers are sliced without the lowest level,
* the lower level of the sliced LoD will be transformed to the absolute offset.
*/
LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end);
LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin,
size_t elem_end);
/*
* Transform an LoD from relative offsets to absolute offsets.
*/
LoD ToAbsOffset(const LoD& in);
bool operator==(const LoD& a, const LoD& b);

@ -30,8 +30,8 @@ class LoDTensorTester : public ::testing::Test {
// 0 5 10 15 20
// 0 2 5 7 10 12 15 20
LoD lod;
lod.push_back(std::vector<size_t>{0, 10, 20});
lod.push_back(std::vector<size_t>{0, 5, 10, 15, 20});
lod.push_back(std::vector<size_t>{0, 2, 3});
lod.push_back(std::vector<size_t>{0, 2, 5, 8});
lod.push_back(std::vector<size_t>{0, 2, 5, 7, 10, 12, 15, 17, 20});
ASSERT_EQ(lod.size(), 3UL);
@ -52,14 +52,14 @@ TEST_F(LoDTensorTester, NumLevels) { ASSERT_EQ(lod_tensor_.NumLevels(), 3UL); }
TEST_F(LoDTensorTester, NumElements) {
ASSERT_EQ(lod_tensor_.NumElements(0), 2UL);
ASSERT_EQ(lod_tensor_.NumElements(1), 4UL);
ASSERT_EQ(lod_tensor_.NumElements(1), 3UL);
ASSERT_EQ(lod_tensor_.NumElements(2), 8UL);
}
TEST_F(LoDTensorTester, NumElements2) {
ASSERT_EQ(lod_tensor_.NumElements(0, 0), 2UL);
ASSERT_EQ(lod_tensor_.NumElements(0, 1), 2UL);
ASSERT_EQ(lod_tensor_.NumElements(1, 1), 2UL);
ASSERT_EQ(lod_tensor_.NumElements(0, 1), 1UL);
ASSERT_EQ(lod_tensor_.NumElements(1, 1), 3UL);
}
TEST_F(LoDTensorTester, ShrinkLevels) {
@ -68,17 +68,16 @@ TEST_F(LoDTensorTester, ShrinkLevels) {
LoDTensor new_lod_tensor = lod_tensor_;
new_lod_tensor.ShrinkLevels(level, level + 1);
ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor_.NumElements(level));
ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
}
// shrink 2 level
for (size_t level = 0; level < 2UL; ++level) {
LoDTensor new_lod_tensor = lod_tensor_;
new_lod_tensor.ShrinkLevels(level, level + 2);
// the lowest level's last element should be the tensor's batch_size.
ASSERT_EQ(new_lod_tensor.lod().back().back(),
lod_tensor_.lod().back().back());
ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor_.NumElements(level));
ASSERT_EQ(new_lod_tensor.NumElements(1),
lod_tensor_.NumElements(level + 1));
ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
}
}
@ -86,19 +85,19 @@ TEST_F(LoDTensorTester, ShrinkLevels) {
TEST_F(LoDTensorTester, ShrinkInLevel) {
size_t level = 0;
LoDTensor new_lod_tensor = lod_tensor_;
new_lod_tensor.ShrinkInLevel(level, 0, 2);
new_lod_tensor.ShrinkInLevel(level, 0, 1);
EXPECT_EQ(new_lod_tensor.NumLevels(), 3UL);
EXPECT_EQ(new_lod_tensor.NumElements(0), 2UL);
EXPECT_EQ(new_lod_tensor.NumElements(1), 4UL);
EXPECT_EQ(new_lod_tensor.NumElements(2), 8UL);
EXPECT_EQ(new_lod_tensor.NumElements(0), 1UL);
EXPECT_EQ(new_lod_tensor.NumElements(1), 2UL);
EXPECT_EQ(new_lod_tensor.NumElements(2), 5UL);
ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
level = 1;
new_lod_tensor = lod_tensor_;
new_lod_tensor.ShrinkInLevel(level, 0, 2);
new_lod_tensor.ShrinkInLevel(level, 1, 2);
ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), 1UL);
ASSERT_EQ(new_lod_tensor.NumElements(1), 3UL);
ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
}

@ -44,6 +44,11 @@ class OpProtoAndCheckerMaker {
var_->set_intermediate(true);
return *this;
}
VariableBuilder& AsDispensable() {
var_->set_dispensable(true);
return *this;
}
};
VariableBuilder AddInput(const std::string& name, const std::string& comment);

@ -252,5 +252,20 @@ std::ostream& operator<<(std::ostream& os,
return os;
}
bool OpSupportGPU(const std::string& op_type) {
auto& all_kernels = OperatorWithKernel::AllOpKernels();
auto it = all_kernels.find(op_type);
if (it == all_kernels.end()) {
// All control operator must support GPU
return true;
}
for (auto& kern_pair : it->second) {
if (platform::is_gpu_place(kern_pair.first.place_)) {
return true;
}
}
return false;
}
} // namespace framework
} // namespace paddle

@ -327,37 +327,47 @@ class CompileTimeInferShapeContext : public InferShapeContext {
bool HasInput(const std::string& name) const override {
const std::vector<std::string>& input_names = op_.Input(name);
auto length = input_names.size();
if (length == 0) {
return false;
}
PADDLE_ENFORCE_EQ(length, 1UL,
"Input(%s) should have only one value, "
"but it have %d now",
name, length);
return block_.HasVar(input_names[0]);
return block_.HasVarRecursive(input_names[0]);
}
bool HasOutput(const std::string& name) const override {
const std::vector<std::string>& output_names = op_.Output(name);
auto length = output_names.size();
if (length == 0) {
return false;
}
PADDLE_ENFORCE_EQ(length, 1UL,
"Output(%s) should have only one value, "
"but it have %d now",
name, length);
return block_.HasVar(output_names[0]);
return block_.HasVarRecursive(output_names[0]);
}
bool HasInputs(const std::string& name) const override {
const std::vector<std::string>& input_names = op_.Input(name);
PADDLE_ENFORCE(!input_names.empty(), "Inputs(%s) length is 0", name);
if (input_names.empty()) {
return false;
}
for (auto& input : input_names) {
if (!block_.HasVar(input)) return false;
if (!block_.HasVarRecursive(input)) return false;
}
return true;
}
bool HasOutputs(const std::string& name) const override {
const std::vector<std::string>& output_names = op_.Output(name);
PADDLE_ENFORCE(!output_names.empty(), "Inputs(%s) length is 0", name);
if (output_names.empty()) {
return false;
}
for (auto& output : output_names) {
if (!block_.HasVar(output)) return false;
if (!block_.HasVarRecursive(output)) return false;
}
return true;
}
@ -404,11 +414,11 @@ class CompileTimeInferShapeContext : public InferShapeContext {
private:
DDim GetDim(const std::string& name) const override {
return framework::make_ddim(block_.FindVar(name)->Shape());
return framework::make_ddim(block_.FindVarRecursive(name)->Shape());
}
void SetDim(const std::string& name, const DDim& dim) override {
block_.FindVar(name)->SetShape(framework::vectorize(dim));
block_.FindVarRecursive(name)->SetShape(framework::vectorize(dim));
}
const OpDescBind& op_;
@ -421,13 +431,27 @@ class RuntimeInferShapeContext : public InferShapeContext {
: op_(op), scope_(scope) {}
bool HasInput(const std::string& name) const override {
auto ipt = op_.Input(name);
auto& ins = Inputs(name);
size_t length = ins.size();
if (length == 0) {
return false;
}
PADDLE_ENFORCE_EQ(length, 1UL, "Input %s should have more than one inputs",
name);
auto ipt = ins[0];
auto* var = ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt);
return var != nullptr;
}
bool HasOutput(const std::string& name) const override {
auto ipt = op_.Output(name);
auto& outs = Outputs(name);
size_t length = outs.size();
if (length == 0) {
return false;
}
PADDLE_ENFORCE_EQ(length, 1UL, "Output %s should have more than one inputs",
name);
auto ipt = outs[0];
auto* var = ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt);
return var != nullptr;
}
@ -649,5 +673,7 @@ class OperatorWithKernel : public OperatorBase {
std::ostream& operator<<(std::ostream& os,
const OperatorWithKernel::OpKernelKey& kernel_key);
extern bool OpSupportGPU(const std::string& op_type);
} // namespace framework
} // namespace paddle

@ -35,8 +35,8 @@ ProgramDesc *ProgramDescBind::Proto() {
ProgramDescBind::ProgramDescBind() {
auto *block = prog_.mutable_blocks()->Add();
block->set_idx(0);
block->set_parent_idx(-1);
block->set_idx(kRootBlockIndex);
block->set_parent_idx(kNoneBlockIndex);
blocks_.emplace_back(new BlockDescBind(this, block));
}

@ -17,6 +17,7 @@ limitations under the License. */
#include <memory>
#include <vector>
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/proto_desc.h"
#include "paddle/platform/macros.h"
namespace paddle {

@ -0,0 +1,26 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
namespace paddle {
namespace framework {
// The Index of first Block in Program. also called root block.
constexpr int kRootBlockIndex = 0;
// The Parent Index of root Block, this block does not exist.
constexpr int kNoneBlockIndex = -1;
} // namespace framework
} // namespace paddle

@ -126,7 +126,7 @@ void MKLDNNEltwiseActivation::resetFwd(Argument& act) {
copyInVal_ = nullptr;
if (act.grad && algo == algorithm::eltwise_tanh) {
// tanh need save src input for backward
inVal_ = MKLDNNMatrix::create(nullptr, val_->getPrimitiveDesc());
inVal_ = MKLDNNMatrix::create(val_->getPrimitiveDesc());
copyInVal_ = std::make_shared<mkldnn::reorder>(*val_, *inVal_);
CHECK(copyInVal_) << "should not be emptry";
pipelineFwd_.push_back(*copyInVal_);
@ -145,7 +145,7 @@ void MKLDNNEltwiseActivation::resetBwd(Argument& act) {
algorithm algo = getAlgo(this->getName());
float alpha = getBwdAlpha();
float beta = getBeta();
grad_ = MKLDNNMatrix::create(act.grad, val_->getPrimitiveDesc());
grad_ = MKLDNNMatrix::create(val_->getPrimitiveDesc(), act.grad);
auto eng = CPUEngine::Instance().getEngine();
auto bwdDesc = eltwise_bwd::desc(
algo, grad_->getMemoryDesc(), val_->getMemoryDesc(), alpha, beta);
@ -230,7 +230,7 @@ void MKLDNNActivation::resetFwd(Argument& act) {
int ic = cnt_ / bs / ih / iw;
CHECK_EQ(cnt_, (size_t)bs * ic * ih * iw);
val_ = MKLDNNMatrix::create(
act.value, {bs, ic, ih, iw}, mkldnn::memory::format::nchw, *engine_);
{bs, ic, ih, iw}, mkldnn::memory::format::nchw, *engine_, act.value);
CHECK(val_);
val_->downSpatial();
}

@ -21,8 +21,8 @@ namespace paddle {
typedef enum {
MKLDNN_BASE = 1, // basical info of MKLDNN
MKLDNN_TESTS = 1, // gtest info of MKLDNN
MKLDNN_SIZES = 2, // size info of MKLDNN
MKLDNN_FMTS = 3, // format info of MKLDNN
MKLDNN_FMTS = 2, // format info of MKLDNN
MKLDNN_SIZES = 3, // size info of MKLDNN
MKLDNN_ALL = 4, // show all info of MKLDNN
} MKLDNN_LOG_LEVEL;

File diff suppressed because it is too large Load Diff

@ -48,17 +48,6 @@ protected:
// save forward primitive_desc, which can be used backward
std::shared_ptr<conv_fwd::primitive_desc> fwdPD_;
// MKLDNNMatrixPtr which should be created from CPU Device
MKLDNNMatrixPtr cpuInVal_;
MKLDNNMatrixPtr cpuInGrad_;
MKLDNNMatrixPtr cpuOutVal_;
MKLDNNMatrixPtr cpuOutGrad_;
// convert handle between CPU device and MKLDNN device
std::shared_ptr<mkldnn::reorder> cvtInVal_;
std::shared_ptr<mkldnn::reorder> cvtInGrad_;
std::shared_ptr<mkldnn::reorder> cvtOutVal_;
std::shared_ptr<mkldnn::reorder> cvtOutGrad_;
// whether the weight has been init
bool hasInitedWgt_;
@ -94,8 +83,6 @@ public:
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override;
void updateInputData() override;
void updateWeights(const UpdateCallback& callback) override;
void convertWeightsFromPaddle() override;
@ -109,26 +96,6 @@ public:
<< ", sw: " << sw_ << ", dh: " << dh_ << ", dw: " << dw_;
}
void printValueFormatFlow() override {
if (cpuInVal_) {
VLOG(MKLDNN_FMTS) << cpuInVal_->getFormat() << " >>>";
}
MKLDNNLayer::printValueFormatFlow();
if (cpuOutVal_) {
VLOG(MKLDNN_FMTS) << " >>> " << cpuOutVal_->getFormat();
}
}
void printGradFormatFlow() override {
if (cpuInGrad_) {
VLOG(MKLDNN_FMTS) << cpuInGrad_->getFormat() << " <<<";
}
MKLDNNLayer::printGradFormatFlow();
if (cpuOutGrad_) {
VLOG(MKLDNN_FMTS) << " <<< " << cpuOutGrad_->getFormat();
}
}
protected:
/**
* load the dims settings of this conv
@ -162,23 +129,6 @@ protected:
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
/**
* reset MKLDNNMatrix of input value
*/
void resetInValue(std::shared_ptr<conv_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in);
/**
* reset MKLDNNMatrix of weight and bias value
*/
void resetWgtBiasValue(std::shared_ptr<conv_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias);
/**
* reset MKLDNNMatrix of output value
*/
void resetOutValue(std::shared_ptr<conv_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& out);
/**
* reset the backward weight primitive descriptor.
*/
@ -207,22 +157,6 @@ protected:
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
/**
* reset MKLDNNMatrix of output grad
*/
void resetOutGrad(std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
MKLDNNMatrixPtr& out);
/**
* reset MKLDNNMatrix of weight and bias grad
*/
void resetWgtBiasGrad(std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias);
/**
* reset MKLDNNMatrix of input grad
*/
void resetInGrad(std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
MKLDNNMatrixPtr& in);
/**
* reset MKLDNNMatrix of weight value for backward data
* since the primitive_desc would be different with wgtVal_

@ -62,7 +62,7 @@ void MKLDNNFcLayer::convertWeightsFromPaddle() {
CHECK(wgtVal_) << "should have been initialized";
bool hasNoSpatial_ = ih_ == 1 && iw_ == 1;
auto targetDim = wgtVal_->getDims();
auto srcFmt = hasNoSpatial_ ? memory::format::io : memory::format::ihwo;
auto srcFmt = hasNoSpatial_ ? format::io : format::ihwo;
wgtVal_->reorderDataFrom(wgtVal_, srcFmt, targetDim);
hasInitedWgt_ = true;
}
@ -71,7 +71,7 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
CHECK(wgtVal_) << "should have been initialized";
bool hasNoSpatial_ = ih_ == 1 && iw_ == 1;
auto targetDim = wgtVal_->getDims();
auto dstFmt = hasNoSpatial_ ? memory::format::io : memory::format::ihwo;
auto dstFmt = hasNoSpatial_ ? format::io : format::ihwo;
wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim);
}
@ -100,8 +100,6 @@ void MKLDNNFcLayer::resetFwd(std::vector<primitive>& pipeline,
resetFwdPD(fwdPD_, in, wgt, bias, out);
resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out);
printValueFormatFlow();
}
void MKLDNNFcLayer::resetBwd(std::vector<primitive>& pipeline,
@ -119,12 +117,6 @@ void MKLDNNFcLayer::resetBwd(std::vector<primitive>& pipeline,
resetBwdDataPD(bwdDataPD, in, out);
resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out);
printGradFormatFlow();
}
void MKLDNNFcLayer::updateInputData() {
inVal_->setData(getInputValue(0, CPU_DEVICE)->getData());
}
void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) {
@ -139,51 +131,30 @@ void MKLDNNFcLayer::resetFwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
resetInValue(in);
resetWgtBiasValue(wgt, bias);
resetOutValue(out);
}
void MKLDNNFcLayer::resetInValue(MKLDNNMatrixPtr& in) {
if (inputIsOnlyMKLDNN()) {
const MatrixPtr& dnnIn = getInputValue(0);
in = std::dynamic_pointer_cast<MKLDNNMatrix>(dnnIn);
CHECK(in) << "Input should be MKLDNNMatrix";
} else {
CHECK_EQ(getPrev(0)->getDeviceId(), CPU_DEVICE) << "Only support CPU yet";
const MatrixPtr& cpuIn = getInputValue(0, CPU_DEVICE);
in = MKLDNNMatrix::create(
cpuIn, {bs_, ic_, ih_, iw_}, format::nchw, engine_);
}
CHECK(in);
in->downSpatial();
}
void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias) {
auto outPD =
MKLDNNMatrix::createPrimitiveDesc({bs_, oc_}, format::nc, engine_);
resetOutValue(out, outPD);
format wgtFmt = format::oihw;
if (inVal_->getFormat() == format::nChw8c) {
if (in->getFormat() == format::nChw8c) {
wgtFmt = format::oIhw8i;
} else if (inVal_->getFormat() == format::nChw16c) {
} else if (in->getFormat() == format::nChw16c) {
wgtFmt = format::oIhw16i;
}
wgt = MKLDNNMatrix::create(
weight_->getW(), {oc_, ic_, ih_, iw_}, wgtFmt, engine_);
auto wgtPD =
MKLDNNMatrix::createPrimitiveDesc({oc_, ic_, ih_, iw_}, wgtFmt, engine_);
resetWithMatrix(wgt, weight_->getW(), wgtPD);
wgt->downSpatial();
VLOG(MKLDNN_FMTS) << "Weight value format: " << wgt->getFormat();
bias = (biases_ && biases_->getW())
? MKLDNNMatrix::create(biases_->getW(), {oc_}, format::x, engine_)
: nullptr;
}
void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) {
out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_);
if (!outputIsOnlyMKLDNN()) {
// fc cpu output value do not need create convert, just share data
getOutput(CPU_DEVICE).value->setData(out->getData());
if (biases_ && biases_->getW()) {
auto biasPD = MKLDNNMatrix::createPrimitiveDesc({oc_}, format::x, engine_);
resetWithMatrix(bias, biases_->getW(), biasPD);
} else {
bias = nullptr;
}
output_.value = std::dynamic_pointer_cast<Matrix>(out);
}
void MKLDNNFcLayer::resetFwdPD(std::shared_ptr<fc_fwd::primitive_desc>& pd,
@ -219,7 +190,6 @@ void MKLDNNFcLayer::resetFwdPipeline(
} else {
fwd_.reset(new fc_fwd(*pd, *in, *wgt, *out));
}
pipeline.push_back(*fwd_);
}
@ -227,44 +197,18 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
resetOutGrad(out);
resetWgtBiasGrad(wgt, bias);
resetInGrad(in);
}
void MKLDNNFcLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
CHECK(outVal_);
if (outputIsOnlyMKLDNN()) {
MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc());
} else {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
output_.grad->setData(cpuOut->getData());
out = MKLDNNMatrix::create(cpuOut, outVal_->getPrimitiveDesc());
}
}
CHECK(inVal_ && outVal_);
resetOutGrad(out, outVal_->getPrimitiveDesc());
resetInGrad(in, inVal_->getPrimitiveDesc());
void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias) {
CHECK(wgtVal_);
wgt = MKLDNNMatrix::create(weight_->getWGrad(), wgtVal_->getPrimitiveDesc());
resetWithMatrix(wgt, weight_->getWGrad(), wgtVal_->getPrimitiveDesc());
if (biasVal_) {
resetWithMatrix(bias, biases_->getWGrad(), biasVal_->getPrimitiveDesc());
} else {
bias = nullptr;
if (biasVal_ == nullptr) {
return;
}
bias =
MKLDNNMatrix::create(biases_->getWGrad(), biasVal_->getPrimitiveDesc());
}
void MKLDNNFcLayer::resetInGrad(MKLDNNMatrixPtr& in) {
in = nullptr;
if (inputLayers_[0]->getOutput().grad == nullptr) {
return;
}
CHECK(inVal_);
MKLDNNLayer::resetInGrad(in, inVal_->getPrimitiveDesc());
}
void MKLDNNFcLayer::resetBwdWgtPD(

@ -66,8 +66,6 @@ public:
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override;
void updateInputData() override;
void updateWeights(const UpdateCallback& callback) override;
void convertWeightsFromPaddle() override;
@ -84,9 +82,6 @@ protected:
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
void resetInValue(MKLDNNMatrixPtr& in);
void resetWgtBiasValue(MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias);
void resetOutValue(MKLDNNMatrixPtr& out);
void resetFwdPD(std::shared_ptr<fc_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr in,
MKLDNNMatrixPtr wgt,
@ -109,9 +104,6 @@ protected:
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
void resetOutGrad(MKLDNNMatrixPtr& out);
void resetWgtBiasGrad(MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias);
void resetInGrad(MKLDNNMatrixPtr& in);
void resetBwdWgtPD(std::shared_ptr<fc_bwdWgt::primitive_desc>& pd,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -85,8 +85,6 @@ void MKLDNNPoolLayer::resetFwd(std::vector<primitive>& pipeline,
resetFwdPD(fwdPD_, in, out);
resetFwdPipeline(pipeline, fwdPD_, in, out);
printValueFormatFlow();
}
void MKLDNNPoolLayer::resetBwd(std::vector<primitive>& pipeline,
@ -101,65 +99,22 @@ void MKLDNNPoolLayer::resetBwd(std::vector<primitive>& pipeline,
resetBwdPD(pd, in, out);
resetBwdPipeline(pipeline, pd, in, out);
printGradFormatFlow();
}
void MKLDNNPoolLayer::updateInputData() {
inVal_->setData(getInputValue(0, CPU_DEVICE)->getData());
}
void MKLDNNPoolLayer::resetFwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out) {
resetInValue(in);
resetOutValue(out);
}
void MKLDNNPoolLayer::resetInValue(MKLDNNMatrixPtr& in) {
if (inputIsOnlyMKLDNN()) {
const MatrixPtr& dnnIn = getInputValue(0);
in = std::dynamic_pointer_cast<MKLDNNMatrix>(dnnIn);
CHECK(in) << "Input should be MKLDNNMatrix";
} else {
CHECK_EQ(getPrev(0)->getDeviceId(), CPU_DEVICE) << "Only support CPU yet";
const MatrixPtr& cpuIn = getInputValue(0, CPU_DEVICE);
in = MKLDNNMatrix::create(
cpuIn, {bs_, ic_, ih_, iw_}, format::nchw, engine_);
}
}
void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) {
CHECK(inVal_) << "Should reset input value first";
memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
out = MKLDNNMatrix::create(
output_.value, outDims, inVal_->getFormat(), engine_);
// create reorder if output value has cpu device and pd do not match
cpuOutVal_ = nullptr;
cvtOutVal_ = nullptr;
if (!outputIsOnlyMKLDNN()) {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value;
cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_);
if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) {
out = MKLDNNMatrix::create(nullptr, out->getPrimitiveDesc());
cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
CHECK(cvtOutVal_) << "should not be emptry";
} else {
cpuOut->setData(output_.value->getData());
cpuOutVal_ = out;
}
output_.value = std::dynamic_pointer_cast<Matrix>(cpuOutVal_);
return;
}
output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
CHECK(in);
auto outPD =
MKLDNNMatrix::createPrimitiveDesc(outDims, in->getFormat(), engine_);
resetOutValue(out, outPD);
}
void MKLDNNPoolLayer::resetFwdPD(std::shared_ptr<pool_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr in,
MKLDNNMatrixPtr out) {
memory::dims inDims = memory::dims{bs_, ic_, ih_, iw_};
memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
memory::dims kernels = memory::dims{fh_, fw_};
memory::dims strides = memory::dims{sh_, sw_};
memory::dims padL = memory::dims{ph_, pw_};
@ -194,58 +149,26 @@ void MKLDNNPoolLayer::resetFwdPipeline(
? std::make_shared<pool_fwd>(pool_fwd(*pd, *in, *out, *workspace_))
: std::make_shared<pool_fwd>(pool_fwd(*pd, *in, *out));
pipeline.push_back(*fwd_);
if (cvtOutVal_) {
pipeline.push_back(*cvtOutVal_);
}
}
void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out) {
resetOutGrad(out);
resetInGrad(in);
}
void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
cpuOutGrad_ = nullptr;
cvtOutGrad_ = nullptr;
CHECK(outVal_);
if (outputIsOnlyMKLDNN()) {
MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc());
} else {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
// always share the same grad data of CPU output
// then the activation can get the right grad from output_.grad
output_.grad->setData(cpuOut->getData());
cpuOutGrad_ = MKLDNNMatrix::create(
cpuOut, memory::dims{bs_, oc_, oh_, ow_}, format::nchw, engine_);
if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) {
out = MKLDNNMatrix::create(nullptr, outVal_->getPrimitiveDesc());
cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out);
CHECK(cvtOutGrad_) << "should not be emptry";
} else {
out = cpuOutGrad_;
}
}
}
void MKLDNNPoolLayer::resetInGrad(MKLDNNMatrixPtr& in) {
in = nullptr;
if (inputLayers_[0]->getOutput().grad == nullptr) {
return;
}
CHECK(inVal_);
MKLDNNLayer::resetInGrad(in, inVal_->getPrimitiveDesc());
CHECK(inVal_ && outVal_);
resetOutGrad(out, outVal_->getPrimitiveDesc());
resetInGrad(in, inVal_->getPrimitiveDesc());
}
void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr<pool_bwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out) {
pd = nullptr;
if (in == nullptr) {
return;
}
memory::dims kernels = memory::dims{fh_, fw_};
memory::dims strides = memory::dims{sh_, sw_};
memory::dims padL = memory::dims{ph_, pw_};
memory::dims padR = getPaddingR();
CHECK(in);
CHECK(out);
auto bwdDesc = pool_bwd::desc(poolAlgo_,
in->getMemoryDesc(),
@ -263,8 +186,8 @@ void MKLDNNPoolLayer::resetBwdPipeline(
std::shared_ptr<pool_bwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out) {
if (cvtOutGrad_) {
pipeline.push_back(*cvtOutGrad_);
if (pd == nullptr) {
return;
}
bwdData_ =

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save