Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into add-async-ssa-graph-executor

mixed_precision_init
Qiao Longfei 6 years ago
commit e70b1727ef

@ -42,12 +42,6 @@ repos:
entry: bash ./tools/codestyle/pylint_pre_commit.hook
language: system
files: \.(py)$
- repo: https://github.com/PaddlePaddle/pre-commit-golang
sha: 8337620115c25ff8333f1b1a493bd031049bd7c0
hooks:
- id: go-fmt
types:
- go
- repo: local
hooks:
- id: copyright_checker

@ -39,8 +39,10 @@ IF(WIN32)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.lib)
SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/mklml.dll)
SET(MKLML_SHARED_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.dll)
ELSE()
SET(MKLML_VER "mklml_lnx_${TIME_VERSION}" CACHE STRING "" FORCE)
ELSE()
#TODO(intel-huying):
# Now enable Erf function in mklml library temporarily, it will be updated as offical version later.
SET(MKLML_VER "Glibc225_vsErf_mklml_lnx_${TIME_VERSION}" CACHE STRING "" FORCE)
SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so)

@ -37,7 +37,7 @@ INCLUDE(GNUInstallDirs)
INCLUDE(ExternalProject)
SET(NGRAPH_PROJECT "extern_ngraph")
SET(NGRAPH_GIT_TAG "20bd8bbc79ae3a81c57313846a2be7313e5d1dab")
SET(NGRAPH_GIT_TAG "a444f7a959b7d87f2c117c9b57a4c387759e481e")
SET(NGRAPH_SOURCES_DIR ${THIRD_PARTY_PATH}/ngraph)
SET(NGRAPH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/ngraph)
SET(NGRAPH_INC_DIR ${NGRAPH_INSTALL_DIR}/include)
@ -69,7 +69,7 @@ ExternalProject_Add(
CMAKE_ARGS -DNGRAPH_DEX_ONLY=TRUE
CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
CMAKE_ARGS -DMKLDNN_INCLUDE_DIR=${MKLDNN_INC_DIR}
CMAKE_ARGS -DMKLDNN_LIB_DIR=${MKLDNN_INSTALL_DIR}/lib
CMAKE_ARGS -DMKLDNN_LIB_DIR=${MKLDNN_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}
CMAKE_ARGS -DMKLML_LIB_DIR=${MKLML_INSTALL_DIR}/lib
)

@ -153,7 +153,11 @@ function(op_library TARGET)
# pybind USE_OP_DEVICE_KERNEL for CUDNN
list(LENGTH cudnn_cu_cc_srcs cudnn_cu_cc_srcs_len)
if (WITH_GPU AND ${cudnn_cu_cc_srcs_len} GREATER 0)
if(${TARGET} STREQUAL "activation")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, CUDNN);\n")
else()
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
endif()
endif()
# pybind USE_OP_DEVICE_KERNEL for MIOPEN
@ -168,6 +172,9 @@ function(op_library TARGET)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, MKLDNN);\n")
elseif(${MKLDNN_FILE} STREQUAL "conv_mkldnn_op")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, FP32);\n")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, S8);\n")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, U8);\n")
else()
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, MKLDNN);\n")
endif()

File diff suppressed because it is too large Load Diff

@ -13,7 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/block_desc.h"
#include <queue>
#include <unordered_set>
#include <utility>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
@ -155,6 +159,16 @@ void BlockDesc::RemoveOp(size_t s, size_t e) {
ops_.erase(ops_.begin() + s, ops_.begin() + e);
}
void BlockDesc::RemoveOpInternal(const OpDesc *op_desc) {
// TODO(minqiyang): make this faster
for (auto it = ops_.begin(); it != ops_.end(); ++it) {
if (it->get() == op_desc) {
ops_.erase(it);
break;
}
}
}
std::vector<OpDesc *> BlockDesc::AllOps() const {
std::vector<OpDesc *> res;
for (const auto &op : ops_) {
@ -163,20 +177,6 @@ std::vector<OpDesc *> BlockDesc::AllOps() const {
return res;
}
void BlockDesc::Clear() {
// clear all ops
ops_.clear();
// clear all vars which are not persistable
for (auto it = vars_.begin(); it != vars_.end();) {
if (it->second->Persistable()) {
++it;
} else {
vars_.erase(it++);
}
}
}
void BlockDesc::Flush() {
for (auto &op_desc : ops_) {
op_desc->Flush();

@ -93,12 +93,12 @@ class BlockDesc {
*/
void RemoveOp(size_t s, size_t e);
void RemoveOpInternal(const OpDesc *op_desc);
void RemoveVar(const std::string &name) { vars_.erase(name); }
std::vector<OpDesc *> AllOps() const;
void Clear();
size_t OpSize() const { return ops_.size(); }
OpDesc *Op(int idx) const { return ops_.at(idx).get(); }

@ -12,7 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h"
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/details/fetch_op_handle.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
@ -55,7 +57,7 @@ FeedFetchList FastThreadedSSAGraphExecutor::Run(
std::vector<FetchOpHandle *> fetch_ops;
for (auto &fetch_var_name : fetch_tensors) {
for (auto &var_map : graph_->Get<details::GraphVars>("vars")) {
for (auto &var_map : graph_->Get<details::GraphVars>(details::kGraphVars)) {
auto it = var_map.find(fetch_var_name);
if (it != var_map.end()) {
fetched_vars[fetch_var_name].push_back(*it->second.rbegin());

@ -20,6 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/framework/transfer_scope_cache.h"
#include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/operators/distributed/distributed.h"

@ -105,4 +105,5 @@ if (WITH_MKLDNN)
cc_test(test_conv_bias_mkldnn_fuse_pass SRCS mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc DEPS conv_bias_mkldnn_fuse_pass naive_executor)
cc_test(test_conv_relu_mkldnn_fuse_pass SRCS mkldnn/conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass)
cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass)
cc_test(test_mkldnn_placement_pass SRCS mkldnn/mkldnn_placement_pass_tester.cc DEPS mkldnn_placement_pass)
endif ()

@ -44,10 +44,14 @@ struct TestIsReachable {
using func = std::function<bool(const std::string&, const std::string&)>;
auto operator()(const std::unique_ptr<ir::Graph>& graph) -> func {
auto find_node = [](const std::unique_ptr<ir::Graph>& graph,
const std::string& name) -> Node* {
auto hash = [](const Node* node) -> std::string {
return node->Name() + std::to_string(node->id());
};
auto find_node = [&](const std::unique_ptr<ir::Graph>& graph,
const std::string& name) -> Node* {
for (auto& node : GraphTraits::DFS(*graph)) {
if (name == node.Name()) {
if (name == hash(&node)) {
return &node;
}
}
@ -55,13 +59,17 @@ struct TestIsReachable {
return nullptr;
};
return [&](std::string from, const std::string to) -> bool {
// update the from and to strings to hashed equivs in loop from graph traits
return [&](std::string from, std::string to) -> bool {
if (from == to) return true;
std::map<std::string, bool> visited;
for (auto& node : GraphTraits::DFS(*graph)) {
visited[node.Name()] = false;
auto hashed = hash(&node);
if (node.Name() == from) from = hashed;
if (node.Name() == to) to = hashed;
visited[hashed] = false;
}
visited[from] = true;
@ -72,15 +80,15 @@ struct TestIsReachable {
while (!queue.empty()) {
auto cur = find_node(graph, queue.front());
queue.pop_front();
if (cur == nullptr) return false;
for (auto n : cur->outputs) {
if (n->Name() == to) return true;
auto hashed_name = hash(n);
if (hashed_name == to) return true;
if (!visited[n->Name()]) {
visited[n->Name()] = true;
queue.push_back(n->Name());
if (!visited[hashed_name]) {
visited[hashed_name] = true;
queue.push_back(hashed_name);
}
}
}
@ -166,6 +174,28 @@ TEST(ConvElementwiseAddMKLDNNFusePass, ConvolutionAsYWithElementwiseAddRelu) {
RunPassAndAssert(&prog, "a", "relu", 1);
}
TEST(ConvElementwiseAddMKLDNNFusePass,
ConvolutionProjectionAsYWithElementwiseAddRelu) {
auto prog = BuildProgramDesc({"a", "b", "c", "d", "e", "f"},
{"bias", "weights", "bias2", "weights2"});
SetOp(&prog, "sigmoid", {{"X", "a"}}, {"Out", "b"});
// right branch
SetOp(&prog, "conv2d",
{{"Input", "b"}, {"Bias", "bias"}, {"Filter", "weights"}},
{"Output", "c"});
// left branch
SetOp(&prog, "conv2d",
{{"Input", "a"}, {"Bias", "bias2"}, {"Filter", "weights2"}},
{"Output", "f"});
SetOp(&prog, "elementwise_add", {{"X", "f"}, {"Y", "c"}}, {"Out", "d"});
SetOp(&prog, "relu", {{"X", "d"}}, {"Out", "e"});
RunPassAndAssert(&prog, "a", "relu", 2);
}
TEST(ConvElementwiseAddMKLDNNFusePass,
ConvolutionAsYWithElementwiseAddReluNoBias) {
auto prog = BuildProgramDesc({"a", "b", "c", "d", "e"}, {"weights"});

@ -21,7 +21,7 @@ namespace ir {
std::unique_ptr<ir::Graph> MKLDNNPlacementPass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const {
VLOG(3) << "Aplies MKL-DNN placement strategy.";
VLOG(3) << "Applies MKL-DNN placement strategy.";
const auto& op_types_list =
Get<std::unordered_set<std::string>>("mkldnn_enabled_op_types");
for (const Node* n : graph->Nodes()) {

@ -0,0 +1,136 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h"
#include <gtest/gtest.h>
#include <boost/logic/tribool.hpp>
namespace paddle {
namespace framework {
namespace ir {
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs, boost::tribool use_mkldnn) {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type);
if (!boost::indeterminate(use_mkldnn)) op->SetAttr("use_mkldnn", use_mkldnn);
if (type == "conv2d") {
op->SetAttr("name", name);
op->SetInput("Input", {inputs[0]});
op->SetInput("Filter", {inputs[1]});
op->SetInput("Bias", {inputs[2]});
} else if (type == "relu") {
op->SetInput("X", inputs);
} else if (type == "concat") {
op->SetAttr("axis", 1);
op->SetInput("X", {inputs[0], inputs[1]});
} else if (type == "pool2d") {
op->SetInput("X", {inputs[0]});
} else {
FAIL() << "Unexpected operator type.";
}
op->SetOutput("Out", {outputs[0]});
}
// operator use_mkldnn
// ---------------------------------------
// (a,b)->concat->c none
// (c,weights,bias)->conv->f none
// f->relu->g false
// g->pool->h false
// (h,weights2,bias2)->conv->k true
// k->relu->l true
ProgramDesc BuildProgramDesc() {
ProgramDesc prog;
for (auto& v :
std::vector<std::string>({"a", "b", "c", "weights", "bias", "f", "g",
"h", "weights2", "bias2", "k", "l"})) {
auto* var = prog.MutableBlock(0)->Var(v);
var->SetType(proto::VarType::SELECTED_ROWS);
if (v == "weights" || v == "bias") {
var->SetPersistable(true);
}
}
SetOp(&prog, "concat", "concat1", std::vector<std::string>({"a", "b"}),
std::vector<std::string>({"c"}), boost::indeterminate);
SetOp(&prog, "conv2d", "conv1",
std::vector<std::string>({"c", "weights", "bias"}),
std::vector<std::string>({"f"}), boost::indeterminate);
SetOp(&prog, "relu", "relu1", std::vector<std::string>({"f"}),
std::vector<std::string>({"g"}), false);
SetOp(&prog, "pool2d", "pool1", std::vector<std::string>({"g"}),
std::vector<std::string>({"h"}), false);
SetOp(&prog, "conv2d", "conv2",
std::vector<std::string>({"h", "weights2", "bias2"}),
std::vector<std::string>({"k"}), true);
SetOp(&prog, "relu", "relu2", std::vector<std::string>({"k"}),
std::vector<std::string>({"l"}), true);
return prog;
}
void MainTest(std::initializer_list<std::string> mkldnn_enabled_op_types,
unsigned expected_use_mkldnn_true_count) {
auto prog = BuildProgramDesc();
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto pass = PassRegistry::Instance().Get("mkldnn_placement_pass");
pass->Set("mkldnn_enabled_op_types",
new std::unordered_set<std::string>(mkldnn_enabled_op_types));
graph = pass->Apply(std::move(graph));
unsigned use_mkldnn_true_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
auto* op = node->Op();
if (op->HasAttr("use_mkldnn") &&
boost::get<bool>(op->GetAttr("use_mkldnn"))) {
++use_mkldnn_true_count;
}
}
}
EXPECT_EQ(use_mkldnn_true_count, expected_use_mkldnn_true_count);
}
TEST(MKLDNNPlacementPass, enable_conv_relu) {
// 1 conv (1 conv is always true) + 2 relu (1 relu is always true) + 0 pool
MainTest({"conv2d", "relu"}, 3);
}
TEST(MKLDNNPlacementPass, enable_relu_pool) {
// 1 conv (1 conv is always true) + 2 relu (1 relu is always true) + 1 pool
MainTest({"relu", "pool2d"}, 4);
}
TEST(MKLDNNPlacementPass, enable_all) {
// 1 conv (1 conv is always true) + 2 relu (1 relu is always true) + 1 pool
MainTest({}, 4);
}
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(mkldnn_placement_pass);

@ -290,7 +290,7 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
"USE_OP_DEVICE_KERNEL must be in global namespace"); \
extern int \
TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE##_##customized_name(); \
UNUSED static int use_op_kernel_##op_type##_##LIBRARY_TYPE##_##DEFAULT_TYPE##_ = /* NOLINT */ \
UNUSED static int use_op_kernel_##op_type##_##LIBRARY_TYPE##_##customized_name##_ = /* NOLINT */ \
TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE##_##customized_name()
#define USE_OP_DEVICE_KERNEL(op_type, LIBRARY_TYPE) \

@ -882,7 +882,8 @@ class RuntimeInferShapeContext : public InferShapeContext {
const RuntimeContext& ctx_;
};
static void CheckTensorNANOrInf(const std::string& name,
static void CheckTensorNANOrInf(const std::string& op_type,
const std::string& name,
const framework::Tensor& tensor) {
if (tensor.memory_size() == 0) {
return;
@ -892,9 +893,9 @@ static void CheckTensorNANOrInf(const std::string& name,
return;
}
PADDLE_ENFORCE(!framework::TensorContainsInf(tensor),
"Tensor %s contains Inf", name);
"Operator %s output Tensor %s contains Inf", op_type, name);
PADDLE_ENFORCE(!framework::TensorContainsNAN(tensor),
"Tensor %s contains NAN", name);
"Operator %s output Tensor %s contains NAN", op_type, name);
}
void OperatorWithKernel::RuntimeInferShape(const Scope& scope,
@ -988,9 +989,10 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
auto* var = exec_scope.FindVar(vname);
if (var == nullptr) continue;
if (var->IsType<framework::LoDTensor>()) {
CheckTensorNANOrInf(vname, var->Get<framework::LoDTensor>());
CheckTensorNANOrInf(type_, vname, var->Get<framework::LoDTensor>());
} else if (var->IsType<framework::SelectedRows>()) {
CheckTensorNANOrInf(vname, var->Get<framework::SelectedRows>().value());
CheckTensorNANOrInf(type_, vname,
var->Get<framework::SelectedRows>().value());
}
}
}

@ -24,3 +24,11 @@ limitations under the License. */
#pragma pop_macro("_XOPEN_SOURCE")
#pragma pop_macro("_POSIX_C_SOURCE")
#if !defined(PYBIND11_HIDDEN)
#ifdef _WIN32
#define PYBIND11_HIDDEN __declspec(dllexport)
#else
#define PYBIND11_HIDDEN __attribute__((visibility("hidden")))
#endif
#endif

@ -14,6 +14,8 @@
#include "paddle/fluid/framework/tensor_util.h"
#include <algorithm>
#include <limits>
#include <memory>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/data_type.h"

@ -18,6 +18,7 @@
#include <limits>
#include <map>
#include <random>
#include <unordered_set>
#include <utility>
#include "paddle/fluid/framework/lod_tensor.h"
@ -139,6 +140,8 @@ class Autograd {
}
}
}
ready_op->InvokeBackwardHooks();
}
}
@ -156,8 +159,10 @@ class Autograd {
for (auto it : candidate->pre_ops_) {
for (OpBase* pre_op : it.second) {
if (!pre_op) continue;
VLOG(5) << "op dep " << candidate->op_desc_->Type() << " <---- "
<< it.first << " <---- " << pre_op->op_desc_->Type();
VLOG(5) << "op dep " << candidate->op_desc_->Type() << " trace id "
<< candidate->trace_id_ << " <---- " << it.first << " <---- "
<< pre_op->op_desc_->Type() << " trace id "
<< pre_op->trace_id_;
if (visited.find(pre_op) == visited.end()) {
visited.insert(pre_op);
queue.push_back(pre_op);
@ -211,6 +216,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
return {};
}
VLOG(3) << "apply op grad: " << op_desc_->Type();
std::vector<framework::VariableValueMap> grad_outputs;
if (backward_id_ > 0) {
VLOG(3) << "py_layer_grad";
@ -272,6 +278,22 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
return input_vars_;
}
void OpBase::InvokeBackwardHooks() {
VLOG(3) << "call backward hooks, hooks num: " << backward_hooks_.size();
// call backward hooks
for (py::object& callable : backward_hooks_) {
callable(this);
}
}
void OpBase::RegisterBackwardHooks(const py::object& callable) {
VLOG(3) << "Register backward hooks " << trace_id_;
// TODO(minqiyang): check the callable format
backward_hooks_.push_back(callable);
}
void VarBase::RunBackward() {
if (!pre_op_) return;

@ -123,22 +123,32 @@ class VarBase {
private:
VarBase(framework::Variable* var, VarBase* grad, bool stop_gradient)
: var_desc_(nullptr),
: name_(),
var_desc_(nullptr),
var_(var),
grads_(grad),
block_(nullptr),
persistable_(false),
stop_gradient_(stop_gradient),
pre_op_(nullptr),
pre_op_out_name_(),
pre_op_out_idx_(-1) {}
public:
virtual ~VarBase() {
// TODO(minqiyang): remove var desc from block desc
if (var_) {
delete var_;
var_ = nullptr;
}
if (grads_) {
delete grads_;
grads_ = nullptr;
}
pre_op_ = nullptr;
pre_op_out_idx_ = -1;
}
inline OpBase* PreOp() const { return pre_op_; }
@ -151,6 +161,14 @@ class VarBase {
void RunBackward();
inline void ResetPreOp(OpBase* op) {
if (op == pre_op_) {
// clear pre_op info when op equals to var's pre_op
pre_op_ = nullptr;
pre_op_out_idx_ = -1;
}
}
void TrackPreOp(OpBase* pre_op, const std::string& pre_op_out_name,
int pre_op_out_idx, bool pre_op_stop_gradient) {
pre_op_ = pre_op;
@ -184,11 +202,15 @@ class VarBase {
return string::Sprintf("%s@IGrad", var_desc_->Name());
}
std::string name_;
framework::VarDesc* var_desc_;
framework::Variable* var_;
VarBase* grads_;
framework::BlockDesc* block_;
bool persistable_;
private:
bool stop_gradient_;
OpBase* pre_op_;
@ -199,15 +221,27 @@ class VarBase {
/* The wrapper for OpDesc which holds a OpDesc and a OpDesc of its
* gradient. This object should be managed totally by Python intepreter.
*/
class OpBase {
class PYBIND11_HIDDEN OpBase {
public:
OpBase()
: op_desc_(nullptr),
forward_id_(-1),
backward_id_(-1),
place_(platform::CPUPlace()) {}
trace_id_(-1),
place_(platform::CPUPlace()),
backward_hooks_() {}
virtual ~OpBase() {
// TODO(minqiyang): remove op_desc from block_desc in tracer
//
// reset all output vars' pre op
for (auto iter : output_vars_) {
for (VarBase* var : iter.second) {
var->ResetPreOp(this);
}
}
// release resource
for (framework::OpDesc* desc : grad_op_descs_) {
delete desc;
}
@ -215,6 +249,10 @@ class OpBase {
std::map<std::string, std::vector<VarBase*>> ApplyGrad();
void RegisterBackwardHooks(const py::object& callable);
void InvokeBackwardHooks();
// One of `op_desc_` or `forward_id_` is set, not both.
// For pure python PyLayer, use `forward_id_`, otherwise, use op_desc_.
framework::OpDesc* op_desc_;
@ -225,6 +263,7 @@ class OpBase {
// Note: each fwd op corresponds to a vector of bwd ops.
std::vector<framework::OpDesc*> grad_op_descs_;
int backward_id_;
int trace_id_;
platform::Place place_;
@ -239,6 +278,8 @@ class OpBase {
std::vector<framework::VariableValueMap> grad_output_vars_;
framework::BlockDesc* block_;
std::vector<py::object> backward_hooks_;
};
class Layer {

@ -14,15 +14,32 @@
#include "paddle/fluid/imperative/tracer.h"
#include <memory>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#ifdef WITH_GPERFTOOLS
#include "gperftools/profiler.h"
#endif
DEFINE_string(
tracer_profile_fname, "",
"Profiler filename for imperative tracer, which generated by gperftools."
"Only valid when compiled `WITH_PROFILER=ON`. Empty if disable.");
namespace paddle {
namespace imperative {
static std::once_flag gTracerProfileOnce;
#ifdef WITH_GPERFTOOLS
static bool gTracerProfilerStarted = false;
#endif
void CreateGradOp(const framework::OpDesc& op_desc,
const std::unordered_set<std::string>& no_grad_set,
const std::vector<framework::BlockDesc*>& grad_sub_block,
@ -68,15 +85,36 @@ platform::Place GetExpectedPlace(platform::Place place, VarBasePtrMap inputs) {
return result;
}
Tracer::Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {
if (!FLAGS_tracer_profile_fname.empty()) {
std::call_once(gTracerProfileOnce, [] {
#ifdef WITH_GPERFTOOLS
ProfilerStart(FLAGS_tracer_profile_fname.c_str());
gTracerProfilerStarted = true;
#else
LOG(WARNING) << "Paddle is not compiled with gperftools. "
"FLAGS_tracer_profile_fname will be ignored";
#endif
});
}
}
std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
const VarBasePtrMap& outputs,
framework::BlockDesc* block,
const platform::Place expected_place,
const bool stop_gradient) {
#ifdef WITH_GPERFTOOLS
if (gTracerProfilerStarted) {
ProfilerFlush();
}
#endif
std::map<std::string, VarBase*> vars;
framework::OpDesc* op_desc = op->op_desc_;
VLOG(3) << "tracer tracing " << op_desc->Type();
VLOG(3) << "tracer tracing " << op_desc->Type() << " trace id "
<< op->trace_id_;
op_desc->InferShape(*block);
op_desc->InferVarType(block);
@ -99,11 +137,13 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
if (inp->PreOp() && !inp->IsStopGradient()) {
op->pre_ops_[it.first].push_back(inp->PreOp());
op->pre_ops_out_idx_[it.first].push_back(inp->PreOpOutIdx());
VLOG(3) << "add pre op " << inp->PreOp()->op_desc_->Type();
} else {
op->pre_ops_[it.first].push_back(nullptr);
}
VLOG(3) << "input vname " << inp->var_desc_->Name() << " "
<< inp->var_->IsInitialized();
<< inp->var_->IsInitialized() << " stop_gradient "
<< inp->IsStopGradient();
}
}
@ -155,6 +195,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
op->grad_input_vars_.resize(op->grad_op_descs_.size());
op->grad_output_vars_.resize(op->grad_op_descs_.size());
for (size_t i = 0; i < op->grad_op_descs_.size(); ++i) {
framework::OpDesc* grad_op_desc = op->grad_op_descs_[i];
for (auto it : grad_op_desc->Inputs()) {
@ -167,7 +208,6 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
PADDLE_ENFORCE(fwd_var_it != vars.end());
// Forward inputs or outputs.
grad_in_vars.push_back(fwd_var_it->second->var_);
vars_saved_for_backward.insert(it.first);
} else {
VarBase* var = vars[var_it->second];
if (!var->grads_->var_->IsInitialized()) {
@ -177,6 +217,8 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
// Douts.
grad_in_vars.push_back(var->grads_->var_);
}
vars_saved_for_backward.insert(it.first);
}
}

@ -40,7 +40,7 @@ platform::Place GetExpectedPlace(platform::Place place, VarBasePtrMap inputs);
class Tracer {
public:
explicit Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {}
explicit Tracer(framework::BlockDesc* root_block);
virtual ~Tracer() {}

@ -16,6 +16,7 @@ add_subdirectory(utils)
if (TENSORRT_FOUND)
add_subdirectory(tensorrt)
endif()
# add_subdirectory(anakin)
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES)

@ -0,0 +1,4 @@
cc_library(anakin_engine SRCS engine.cc)
target_link_libraries(anakin_engine anakin anakin_saber_common)
cc_test(test_anakin_engine SRCS test_anakin_engine.cc DEPS anakin_engine)
add_subdirectory(convert)

@ -0,0 +1,2 @@
cc_library(anakin_op_converter SRCS fc.cc registrar.cc DEPS anakin_engine framework_proto scope)
cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op)

@ -0,0 +1,73 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/fc.h"
#include <algorithm>
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::Precision;
using anakin::saber::NV;
using anakin::saber::X86;
using anakin::saber::Shape;
using anakin::PBlock;
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
void FcOpConverter::operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto x_name = op_desc.Input("X").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto *y_v = scope.FindVar(op_desc.Input("Y").front());
PADDLE_ENFORCE_NOT_NULL(y_v);
auto *y_t = y_v->GetMutable<framework::LoDTensor>();
auto input_name = op_desc.Input("X").front();
auto output_name = op_desc.Output("Out").front();
auto weight_shape = framework::vectorize2int(y_t->dims());
engine_->AddOp(op_name, "Dense", {input_name}, {output_name});
engine_->AddOpAttr(op_name, "bias_term", false);
engine_->AddOpAttr(op_name, "axis", 1);
int out_dim = weight_shape[1];
engine_->AddOpAttr(op_name, "out_dim", out_dim);
weight_shape.push_back(1);
weight_shape.push_back(1);
Shape anakin_shape(weight_shape);
framework::LoDTensor weight_tensor;
weight_tensor.Resize(y_t->dims());
TensorCopySync((*y_t), platform::CPUPlace(), &weight_tensor);
auto *weight1 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(anakin_shape);
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
std::copy_n(weight_tensor.data<float>(), weight_tensor.numel(), cpu_data);
weight1->d_tensor().set_shape(anakin_shape);
weight1->d_tensor().copy_from(weight1->h_tensor());
engine_->AddOpAttr(op_name, "weight_1", *weight1);
}
} // namespace anakin
} // namespace inference
} // namespace paddle

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save