Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into develop
commit
d5f791815b
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,243 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h"
|
||||||
|
#include <algorithm>
|
||||||
|
#include <string>
|
||||||
|
#include "paddle/fluid/framework/lod_tensor.h"
|
||||||
|
|
||||||
|
#include "paddle/fluid/operators/math/blas.h"
|
||||||
|
#include "paddle/fluid/operators/math/cpu_vec.h"
|
||||||
|
#include "paddle/fluid/operators/math/fc_compute.h"
|
||||||
|
#include "paddle/fluid/platform/cpu_info.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
namespace ir {
|
||||||
|
|
||||||
|
static int BuildFusion(Graph* graph, const std::string& name_scope,
|
||||||
|
Scope* scope, bool with_fc_bias) {
|
||||||
|
GraphPatternDetector gpd;
|
||||||
|
auto* pattern = gpd.mutable_pattern();
|
||||||
|
|
||||||
|
// Build pattern
|
||||||
|
PDNode* x = pattern->NewNode(patterns::PDNodeName(name_scope, "x"))
|
||||||
|
->assert_is_op_input("lookup_table")
|
||||||
|
->assert_var_not_persistable();
|
||||||
|
patterns::Embedding embedding_pattern(pattern, name_scope);
|
||||||
|
// TODO(jczaja): Intermediate can only be for val that are not used anywhere
|
||||||
|
// but lookup table output may go into other LSTM (for reverse
|
||||||
|
// direction)
|
||||||
|
auto* embedding_out = embedding_pattern(x);
|
||||||
|
patterns::FC fc_pattern(pattern, name_scope);
|
||||||
|
|
||||||
|
// fc_out is a tmp var, will be removed after fuse, so marked as intermediate.
|
||||||
|
auto* fc_out = fc_pattern(embedding_out, with_fc_bias)->AsIntermediate();
|
||||||
|
patterns::LSTM lstm_pattern(pattern, name_scope);
|
||||||
|
lstm_pattern(fc_out);
|
||||||
|
|
||||||
|
// Create New OpDesc
|
||||||
|
auto embedding_lstm_creator = [&](Node* embedding, Node* W, Node* lstm,
|
||||||
|
Node* input, Node* weight_x, Node* weight_h,
|
||||||
|
Node* bias, Node* hidden, Node* cell,
|
||||||
|
Node* xx, Node* fc_bias) {
|
||||||
|
OpDesc op_desc;
|
||||||
|
op_desc.SetType("fused_embedding_fc_lstm");
|
||||||
|
#define SET_IN(Key, node__) op_desc.SetInput(#Key, {node__->Name()});
|
||||||
|
SET_IN(Ids, input);
|
||||||
|
SET_IN(WeightH, weight_h);
|
||||||
|
// Neet to have this passed as We need Wc data for peephole connections
|
||||||
|
SET_IN(Bias, bias);
|
||||||
|
#undef SET_IN
|
||||||
|
|
||||||
|
// Multiply embeddings with Weights
|
||||||
|
PADDLE_ENFORCE(scope);
|
||||||
|
const std::string& embeddings = patterns::UniqueKey("Embeddings");
|
||||||
|
auto* embeddings_var = scope->Var(embeddings);
|
||||||
|
PADDLE_ENFORCE(embeddings_var);
|
||||||
|
auto* embeddings_tensor =
|
||||||
|
embeddings_var->GetMutable<framework::LoDTensor>();
|
||||||
|
// Get WeightX size: [single_embedding, fc_size]
|
||||||
|
// and embedding size: [dict_size, single_embedding]
|
||||||
|
// and create new size of embeddings eg. [dict_size , hidden_size]
|
||||||
|
auto* embedding_var = scope->FindVar(W->Name());
|
||||||
|
PADDLE_ENFORCE(embedding_var);
|
||||||
|
const auto& embedding_tensor = embedding_var->Get<framework::LoDTensor>();
|
||||||
|
|
||||||
|
const auto& weightx_tensor =
|
||||||
|
scope->FindVar(weight_x->Name())->Get<framework::LoDTensor>();
|
||||||
|
embeddings_tensor->Resize(
|
||||||
|
{embedding_tensor.dims()[0], weightx_tensor.dims()[1]});
|
||||||
|
|
||||||
|
// Multiplie embeddings via WeightsX and add bias
|
||||||
|
auto embedding_data = embedding_tensor.data<float>();
|
||||||
|
auto weightx_data = weightx_tensor.data<float>();
|
||||||
|
auto embeddings_data =
|
||||||
|
embeddings_tensor->mutable_data<float>(platform::CPUPlace());
|
||||||
|
|
||||||
|
// Adding biases to GEMM result to be
|
||||||
|
auto* lstm_bias_var = scope->FindVar(bias->Name());
|
||||||
|
PADDLE_ENFORCE(lstm_bias_var);
|
||||||
|
const auto& lstm_bias_tensor = lstm_bias_var->Get<framework::LoDTensor>();
|
||||||
|
|
||||||
|
auto alpha = 1.0f;
|
||||||
|
auto beta = 1.0f;
|
||||||
|
int m = embedding_tensor.dims()[0];
|
||||||
|
int n = weightx_tensor.dims()[1];
|
||||||
|
int k = embedding_tensor.dims()[1];
|
||||||
|
|
||||||
|
// Copy only gate biases values (only actual bias data, not peephole
|
||||||
|
// weights)
|
||||||
|
std::vector<float> combined_biases;
|
||||||
|
combined_biases.reserve(n);
|
||||||
|
std::copy_n(lstm_bias_tensor.data<float>(), n,
|
||||||
|
std::back_inserter(combined_biases));
|
||||||
|
|
||||||
|
if (with_fc_bias) {
|
||||||
|
// Add FC-bias with LSTM-bias (into GEMM result to be)
|
||||||
|
auto* fc_bias_var = scope->FindVar(fc_bias->Name());
|
||||||
|
const auto& fc_bias_tensor = fc_bias_var->Get<framework::LoDTensor>();
|
||||||
|
for (int i = 0; i < fc_bias_tensor.numel(); i++) {
|
||||||
|
combined_biases[i] += fc_bias_tensor.data<float>()[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// broadcast biases
|
||||||
|
std::vector<float> ones(m, 1.0f);
|
||||||
|
paddle::operators::math::CBlas<float>::GEMM(
|
||||||
|
CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, 1, alpha, &ones[0], 1,
|
||||||
|
&combined_biases[0], n, 0.0f, embeddings_data, n);
|
||||||
|
|
||||||
|
// Wx*embeddings + biases
|
||||||
|
paddle::operators::math::CBlas<float>::GEMM(
|
||||||
|
CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha,
|
||||||
|
embedding_data, k, weightx_data, n, beta, embeddings_data, n);
|
||||||
|
op_desc.SetInput("Embeddings", {embeddings});
|
||||||
|
|
||||||
|
// Create temp variables.
|
||||||
|
const std::string BatchedInput = patterns::UniqueKey("BatchedInput");
|
||||||
|
const std::string BatchedCellPreAct =
|
||||||
|
patterns::UniqueKey("BatchedCellPreAct");
|
||||||
|
const std::string BatchedGate = patterns::UniqueKey("BatchedGate");
|
||||||
|
|
||||||
|
scope->Var(BatchedInput)->GetMutable<framework::LoDTensor>();
|
||||||
|
scope->Var(BatchedCellPreAct)->GetMutable<framework::LoDTensor>();
|
||||||
|
scope->Var(BatchedGate)->GetMutable<framework::LoDTensor>();
|
||||||
|
|
||||||
|
op_desc.SetInput("H0", {});
|
||||||
|
op_desc.SetInput("C0", {});
|
||||||
|
op_desc.SetOutput("Hidden", {hidden->Name()});
|
||||||
|
op_desc.SetOutput("Cell", {cell->Name()});
|
||||||
|
op_desc.SetOutput("XX", {xx->Name()});
|
||||||
|
op_desc.SetOutput("BatchedGate", {BatchedGate});
|
||||||
|
op_desc.SetOutput("BatchCellPreAct", {BatchedCellPreAct});
|
||||||
|
op_desc.SetOutput("BatchedInput", {BatchedInput});
|
||||||
|
op_desc.SetAttr("is_reverse", lstm->Op()->GetAttr("is_reverse"));
|
||||||
|
op_desc.SetAttr("use_peepholes", lstm->Op()->GetAttr("use_peepholes"));
|
||||||
|
// TODO(TJ): get from attr
|
||||||
|
op_desc.SetAttr("use_seq", true);
|
||||||
|
|
||||||
|
PADDLE_ENFORCE(graph->Has(kParamScopeAttr));
|
||||||
|
auto* scope = graph->Get<Scope*>(kParamScopeAttr);
|
||||||
|
#define OP_SET_OUT(x) \
|
||||||
|
const std::string x = patterns::UniqueKey(#x); \
|
||||||
|
op_desc.SetOutput(#x, {x}); \
|
||||||
|
scope->Var(x)->GetMutable<LoDTensor>()
|
||||||
|
OP_SET_OUT(BatchedCell);
|
||||||
|
OP_SET_OUT(BatchedHidden);
|
||||||
|
OP_SET_OUT(ReorderedH0);
|
||||||
|
OP_SET_OUT(ReorderedC0);
|
||||||
|
#undef OP_SET_OUT
|
||||||
|
|
||||||
|
auto* op = graph->CreateOpNode(&op_desc);
|
||||||
|
IR_NODE_LINK_TO(input, op);
|
||||||
|
IR_NODE_LINK_TO(weight_x, op);
|
||||||
|
IR_NODE_LINK_TO(weight_h, op);
|
||||||
|
IR_NODE_LINK_TO(bias, op);
|
||||||
|
IR_NODE_LINK_TO(op, hidden);
|
||||||
|
return op;
|
||||||
|
};
|
||||||
|
|
||||||
|
int fusion_count{0};
|
||||||
|
|
||||||
|
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
|
||||||
|
Graph* g) {
|
||||||
|
GET_IR_NODE_FROM_SUBGRAPH(lstm, lstm, lstm_pattern);
|
||||||
|
GET_IR_NODE_FROM_SUBGRAPH(Weight, Weight, lstm_pattern);
|
||||||
|
GET_IR_NODE_FROM_SUBGRAPH(Bias, Bias, lstm_pattern);
|
||||||
|
GET_IR_NODE_FROM_SUBGRAPH(Cell, Cell, lstm_pattern);
|
||||||
|
GET_IR_NODE_FROM_SUBGRAPH(Hidden, Hidden, lstm_pattern);
|
||||||
|
GET_IR_NODE_FROM_SUBGRAPH(lookup_table, lookup_table, embedding_pattern);
|
||||||
|
GET_IR_NODE_FROM_SUBGRAPH(W, W, embedding_pattern);
|
||||||
|
GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern);
|
||||||
|
GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern);
|
||||||
|
|
||||||
|
// TODO(jczaja): Add support for is_sparse / is_distributed
|
||||||
|
auto is_sparse = boost::get<bool>(lookup_table->Op()->GetAttr("is_sparse"));
|
||||||
|
auto is_distributed =
|
||||||
|
boost::get<bool>(lookup_table->Op()->GetAttr("is_distributed"));
|
||||||
|
|
||||||
|
if (is_sparse == true || is_distributed == true) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (with_fc_bias) {
|
||||||
|
GET_IR_NODE_FROM_SUBGRAPH(fc_out, Out, fc_pattern);
|
||||||
|
GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern);
|
||||||
|
GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern);
|
||||||
|
embedding_lstm_creator(lookup_table, W, lstm, subgraph.at(x), w, Weight,
|
||||||
|
Bias, Hidden, Cell, fc_out, fc_bias);
|
||||||
|
// Remove unneeded nodes.
|
||||||
|
// TODO(jczaja): Proper removing of lookup table
|
||||||
|
std::unordered_set<const Node*> marked_nodes(
|
||||||
|
//{lookup_table, mul, lstm, elementwise_add, fc_bias, W});
|
||||||
|
{mul, lstm, elementwise_add, fc_bias});
|
||||||
|
GraphSafeRemoveNodes(graph, marked_nodes);
|
||||||
|
} else {
|
||||||
|
GET_IR_NODE_FROM_SUBGRAPH(fc_out, mul_out, fc_pattern);
|
||||||
|
embedding_lstm_creator(lookup_table, W, lstm, subgraph.at(x), w, Weight,
|
||||||
|
Bias, Hidden, Cell, fc_out, nullptr);
|
||||||
|
// Remove unneeded nodes.
|
||||||
|
// TODO(jczaja): Proper removing of lookup table
|
||||||
|
// std::unordered_set<const Node*> marked_nodes({lookup_table, W, mul,
|
||||||
|
// lstm});
|
||||||
|
std::unordered_set<const Node*> marked_nodes({mul, lstm});
|
||||||
|
GraphSafeRemoveNodes(graph, marked_nodes);
|
||||||
|
}
|
||||||
|
|
||||||
|
++fusion_count;
|
||||||
|
};
|
||||||
|
|
||||||
|
gpd(graph, handler);
|
||||||
|
|
||||||
|
return fusion_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<ir::Graph> EmbeddingFCLSTMFusePass::ApplyImpl(
|
||||||
|
std::unique_ptr<ir::Graph> graph) const {
|
||||||
|
FusePassBase::Init(name_scope_, graph.get());
|
||||||
|
|
||||||
|
int fusion_count = BuildFusion(graph.get(), name_scope_, param_scope(),
|
||||||
|
true /*with_fc_bias*/);
|
||||||
|
|
||||||
|
AddStatis(fusion_count);
|
||||||
|
return graph;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace ir
|
||||||
|
} // namespace framework
|
||||||
|
} // namespace paddle
|
||||||
|
|
||||||
|
REGISTER_PASS(embedding_fc_lstm_fuse_pass,
|
||||||
|
paddle::framework::ir::EmbeddingFCLSTMFusePass);
|
@ -0,0 +1,167 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "paddle/fluid/framework/feed_fetch_method.h"
|
||||||
|
#include "paddle/fluid/framework/lod_rank_table.h"
|
||||||
|
#include "paddle/fluid/framework/lod_tensor_array.h"
|
||||||
|
#include "paddle/fluid/framework/naive_executor.h"
|
||||||
|
#include "paddle/fluid/framework/op_registry.h"
|
||||||
|
#include "paddle/fluid/framework/reader.h"
|
||||||
|
#include "paddle/fluid/string/pretty_log.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
|
||||||
|
// These code can be shared with Executor.
|
||||||
|
static void InitializeVariable(Variable *var, proto::VarType::Type var_type) {
|
||||||
|
if (var_type == proto::VarType::LOD_TENSOR) {
|
||||||
|
var->GetMutable<LoDTensor>();
|
||||||
|
} else if (var_type == proto::VarType::SELECTED_ROWS) {
|
||||||
|
var->GetMutable<SelectedRows>();
|
||||||
|
} else if (var_type == proto::VarType::FEED_MINIBATCH) {
|
||||||
|
var->GetMutable<FeedFetchList>();
|
||||||
|
} else if (var_type == proto::VarType::FETCH_LIST) {
|
||||||
|
var->GetMutable<FeedFetchList>();
|
||||||
|
} else if (var_type == proto::VarType::STEP_SCOPES) {
|
||||||
|
var->GetMutable<std::vector<framework::Scope>>();
|
||||||
|
} else if (var_type == proto::VarType::LOD_RANK_TABLE) {
|
||||||
|
var->GetMutable<LoDRankTable>();
|
||||||
|
} else if (var_type == proto::VarType::LOD_TENSOR_ARRAY) {
|
||||||
|
var->GetMutable<LoDTensorArray>();
|
||||||
|
} else if (var_type == proto::VarType::PLACE_LIST) {
|
||||||
|
var->GetMutable<platform::PlaceList>();
|
||||||
|
} else if (var_type == proto::VarType::READER) {
|
||||||
|
var->GetMutable<ReaderHolder>();
|
||||||
|
} else if (var_type == proto::VarType::RAW) {
|
||||||
|
// GetMutable will be called in operator
|
||||||
|
} else {
|
||||||
|
PADDLE_THROW(
|
||||||
|
"Variable type %d is not in "
|
||||||
|
"[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
|
||||||
|
"LOD_RANK_TABLE, PLACE_LIST, READER, CHANNEL, RAW]",
|
||||||
|
var_type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void NaiveExecutor::Prepare(Scope *parent_scope,
|
||||||
|
const ProgramDesc &program_desc, int block_id,
|
||||||
|
bool with_feed_fetch_ops) {
|
||||||
|
if (!parent_scope) {
|
||||||
|
scope_ = new framework::Scope;
|
||||||
|
} else {
|
||||||
|
scope_ = &parent_scope->NewScope();
|
||||||
|
}
|
||||||
|
CreateVariables(program_desc, scope_, block_id);
|
||||||
|
CreateOps(program_desc, block_id, with_feed_fetch_ops);
|
||||||
|
}
|
||||||
|
|
||||||
|
void NaiveExecutor::Run() {
|
||||||
|
for (auto &op : ops_) {
|
||||||
|
VLOG(4) << "run " << op->Type();
|
||||||
|
op->Run(*scope_, place_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void NaiveExecutor::CreateVariables(const ProgramDesc &desc, Scope *scope,
|
||||||
|
int block_id) {
|
||||||
|
PADDLE_ENFORCE(scope);
|
||||||
|
auto &global_block = desc.Block(block_id);
|
||||||
|
|
||||||
|
const Scope *ancestor_scope = scope;
|
||||||
|
while (ancestor_scope->parent()) {
|
||||||
|
ancestor_scope = ancestor_scope->parent();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ancestor_scope != scope) {
|
||||||
|
for (auto &var : global_block.AllVars()) {
|
||||||
|
if (var->Name() == framework::kEmptyVarName) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Create persistable vars in ancestor scope.
|
||||||
|
if (var->Persistable()) {
|
||||||
|
auto *ptr = const_cast<Scope *>(ancestor_scope)->Var(var->Name());
|
||||||
|
InitializeVariable(ptr, var->GetType());
|
||||||
|
VLOG(3) << "Create Variable " << var->Name()
|
||||||
|
<< " global, which pointer is " << ptr;
|
||||||
|
} else { // Create temporary variables in local scope.
|
||||||
|
auto *ptr = scope->Var(var->Name());
|
||||||
|
InitializeVariable(ptr, var->GetType());
|
||||||
|
VLOG(3) << "Create Variable " << var->Name()
|
||||||
|
<< " locally, which pointer is " << ptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (auto &var : global_block.AllVars()) {
|
||||||
|
auto *ptr = scope->Var(var->Name());
|
||||||
|
InitializeVariable(ptr, var->GetType());
|
||||||
|
VLOG(3) << "Create variable " << var->Name() << ", which pointer is "
|
||||||
|
<< ptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void NaiveExecutor::CreateOps(const ProgramDesc &desc, int block_id,
|
||||||
|
bool with_feed_fetch_ops) {
|
||||||
|
for (const auto &op_desc : desc.Block(block_id).AllOps()) {
|
||||||
|
if (!with_feed_fetch_ops &&
|
||||||
|
(op_desc->Type() == "feed" || op_desc->Type() == "fetch")) {
|
||||||
|
string::PrettyLogEndl(string::Style::detail(), "--- skip [%s], %s -> %s",
|
||||||
|
op_desc->Input("X")[0], op_desc->Type(),
|
||||||
|
op_desc->Output("Out")[0]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
ops_.emplace_back(OpRegistry::CreateOp(*op_desc));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
LoDTensor *NaiveExecutor::FindTensor(const std::string &name) {
|
||||||
|
PADDLE_ENFORCE(scope_, "Need to init scope first");
|
||||||
|
auto *var = scope_->FindVar(name);
|
||||||
|
PADDLE_ENFORCE(var, "No variable [%s] in the scope");
|
||||||
|
auto *tensor = const_cast<LoDTensor *>(&var->Get<LoDTensor>());
|
||||||
|
return tensor;
|
||||||
|
}
|
||||||
|
|
||||||
|
void NaiveExecutor::CleanFeedFetchOps() {
|
||||||
|
std::vector<std::unique_ptr<OperatorBase>> ops;
|
||||||
|
for (auto &op : ops_) {
|
||||||
|
if (op->Type() != "feed" && op->Type() != "fetch") {
|
||||||
|
ops.emplace_back(std::move(op));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ops_.swap(ops);
|
||||||
|
}
|
||||||
|
|
||||||
|
void NaiveExecutor::EnableMKLDNN(const ProgramDesc &program) {
|
||||||
|
#ifdef PADDLE_WITH_MKLDNN
|
||||||
|
VLOG(3) << "use_mkldnn=True";
|
||||||
|
for (size_t block_id = 0; block_id < program.Size(); ++block_id) {
|
||||||
|
auto *block = const_cast<ProgramDesc &>(program).MutableBlock(block_id);
|
||||||
|
for (auto *op : block->AllOps()) {
|
||||||
|
if (op->HasAttr("use_mkldnn")) {
|
||||||
|
op->SetAttr("use_mkldnn", true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
LOG(WARNING)
|
||||||
|
<< "'MKLDNN' is not supported, Please re-compile with WITH_MKLDNN option";
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace framework
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,67 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include "paddle/fluid/framework/operator.h"
|
||||||
|
#include "paddle/fluid/framework/program_desc.h"
|
||||||
|
#include "paddle/fluid/framework/scope.h"
|
||||||
|
#include "paddle/fluid/platform/device_context.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Simple, intuitive and effective. Only single thread is supported, and
|
||||||
|
* currently designed for inference.
|
||||||
|
*/
|
||||||
|
class NaiveExecutor {
|
||||||
|
public:
|
||||||
|
explicit NaiveExecutor(const platform::Place& place) : place_(place) {}
|
||||||
|
|
||||||
|
// Create child scope.
|
||||||
|
// Create variables.
|
||||||
|
// @with_feed_fetch_ops: whether to work with the feed and fetch operators.
|
||||||
|
void Prepare(Scope* parent_scope, const ProgramDesc& program_desc,
|
||||||
|
int block_id, bool with_feed_fetch_ops);
|
||||||
|
|
||||||
|
// Run all the operators.
|
||||||
|
void Run();
|
||||||
|
|
||||||
|
// Get an tensor to operating directly, without the need for feed_ops.
|
||||||
|
LoDTensor* FindTensor(const std::string& name);
|
||||||
|
|
||||||
|
Scope* scope() { return scope_; }
|
||||||
|
|
||||||
|
void CleanFeedFetchOps();
|
||||||
|
|
||||||
|
void EnableMKLDNN(const ProgramDesc& program);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void CreateVariables(const ProgramDesc& desc, Scope* scope, int block_id);
|
||||||
|
|
||||||
|
void CreateOps(const ProgramDesc& desc, int block_id,
|
||||||
|
bool with_feed_fetch_ops);
|
||||||
|
|
||||||
|
private:
|
||||||
|
const platform::Place place_;
|
||||||
|
// Catch the required resource to avoid recreate.
|
||||||
|
std::vector<std::unique_ptr<OperatorBase>> ops_;
|
||||||
|
Scope* scope_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace framework
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,70 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "paddle/fluid/framework/naive_executor.h"
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <algorithm>
|
||||||
|
#include "paddle/fluid/framework/op_registry.h"
|
||||||
|
#include "paddle/fluid/framework/program_desc.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
|
||||||
|
TEST(NaiveExecutor, Basic) {
|
||||||
|
ProgramDesc program;
|
||||||
|
auto* main_block = program.MutableBlock(0);
|
||||||
|
auto* a = main_block->Var("a"); // input
|
||||||
|
auto* b = main_block->Var("b"); // input
|
||||||
|
auto* c = main_block->Var("c"); // input
|
||||||
|
a->SetType(proto::VarType::LOD_TENSOR);
|
||||||
|
b->SetType(proto::VarType::LOD_TENSOR);
|
||||||
|
c->SetType(proto::VarType::LOD_TENSOR);
|
||||||
|
|
||||||
|
auto* add = main_block->AppendOp();
|
||||||
|
add->SetType("elementwise_add");
|
||||||
|
add->SetInput("X", {"a"});
|
||||||
|
add->SetInput("Y", {"b"});
|
||||||
|
add->SetOutput("Out", {"c"});
|
||||||
|
|
||||||
|
auto place = platform::CPUPlace();
|
||||||
|
NaiveExecutor exe(place);
|
||||||
|
exe.Prepare(nullptr, program, 0, false /*with feed fetch ops*/);
|
||||||
|
auto* a_tensor = exe.FindTensor("a");
|
||||||
|
auto* b_tensor = exe.FindTensor("b");
|
||||||
|
auto* c_tensor = exe.FindTensor("c");
|
||||||
|
|
||||||
|
a_tensor->Resize({1, 4});
|
||||||
|
b_tensor->Resize({1, 4});
|
||||||
|
c_tensor->Resize({1, 4});
|
||||||
|
b_tensor->mutable_data<float>(place);
|
||||||
|
a_tensor->mutable_data<float>(place);
|
||||||
|
|
||||||
|
float a_arr[] = {0, 1, 2, 3};
|
||||||
|
float b_arr[] = {0.0, .1, .2, .3};
|
||||||
|
|
||||||
|
std::copy_n(a_arr, 4, a_tensor->mutable_data<float>(place));
|
||||||
|
std::copy_n(b_arr, 4, b_tensor->mutable_data<float>(place));
|
||||||
|
|
||||||
|
exe.Run();
|
||||||
|
|
||||||
|
auto* c_data = c_tensor->mutable_data<float>(place);
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
EXPECT_NEAR(c_data[i], 1.1 * i, 1e-3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace framework
|
||||||
|
} // namespace paddle
|
||||||
|
|
||||||
|
USE_OP(elementwise_add);
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue