Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into imperative_shared_ptr
test=developrevert-15207-remove_op_handle_lock_and_fix_var
commit
d0b640dca1
@ -0,0 +1,28 @@
|
||||
# - Find JeMalloc library
|
||||
# Find the native JeMalloc includes and library
|
||||
#
|
||||
# JEMALLOC_INCLUDE_DIR - where to find jemalloc.h, etc.
|
||||
# JEMALLOC_LIBRARIES - List of libraries when using jemalloc.
|
||||
# JEMALLOC_FOUND - True if jemalloc found.
|
||||
|
||||
find_path(JEMALLOC_INCLUDE_DIR
|
||||
NAMES jemalloc/jemalloc.h
|
||||
HINTS ${JEMALLOC_ROOT_DIR}/include)
|
||||
|
||||
find_library(JEMALLOC_LIBRARIES
|
||||
NAMES jemalloc
|
||||
HINTS ${JEMALLOC_ROOT_DIR}/lib)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(jemalloc DEFAULT_MSG JEMALLOC_LIBRARIES JEMALLOC_INCLUDE_DIR)
|
||||
|
||||
mark_as_advanced(
|
||||
JEMALLOC_LIBRARIES
|
||||
JEMALLOC_INCLUDE_DIR)
|
||||
|
||||
if (JEMALLOC_FOUND)
|
||||
add_library(jemalloc::jemalloc UNKNOWN IMPORTED)
|
||||
set_target_properties(jemalloc::jemalloc PROPERTIES
|
||||
IMPORTED_LOCATION ${JEMALLOC_LIBRARIES}
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${JEMALLOC_INCLUDE_DIR}")
|
||||
endif()
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,99 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/details/parallel_ssa_graph_executor.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
ParallelSSAGraphExecutor::ParallelSSAGraphExecutor(
|
||||
const ExecutionStrategy &strategy, const std::vector<Scope *> &local_scopes,
|
||||
const std::vector<platform::Place> &places,
|
||||
std::vector<std::unique_ptr<ir::Graph>> &&graphs)
|
||||
: strategy_(std::move(strategy)),
|
||||
local_scopes_(std::move(local_scopes)),
|
||||
pool_(places.size() >= 2 ? new ::ThreadPool(places.size()) : nullptr),
|
||||
places_(std::move(places)),
|
||||
graphs_(std::move(graphs)) {
|
||||
PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size());
|
||||
|
||||
// set the correct size of thread pool to each device.
|
||||
strategy_.num_threads_ = strategy_.num_threads_ < places_.size()
|
||||
? 1UL
|
||||
: strategy_.num_threads_ / places_.size();
|
||||
VLOG(1) << "set num_threads: " << strategy_.num_threads_
|
||||
<< " to run the operators of the graph on each device.";
|
||||
for (size_t i = 0; i < places.size(); ++i) {
|
||||
executors_.emplace_back(new details::ThreadedSSAGraphExecutor(
|
||||
strategy_, {local_scopes_[i]}, {places_[i]}, std::move(graphs_[i])));
|
||||
}
|
||||
}
|
||||
|
||||
FeedFetchList ParallelSSAGraphExecutor::Run(
|
||||
const std::vector<std::string> &fetch_tensors) {
|
||||
std::vector<std::future<FeedFetchList>> run_futures;
|
||||
|
||||
std::vector<FeedFetchList> fetch_data;
|
||||
FeedFetchList ret;
|
||||
|
||||
fetch_data.reserve(places_.size());
|
||||
ret.reserve(fetch_tensors.size());
|
||||
exception_holder_.Clear();
|
||||
|
||||
for (size_t i = 0; i < places_.size(); ++i) {
|
||||
auto call = [this, i, &fetch_tensors]() -> FeedFetchList {
|
||||
try {
|
||||
return executors_[i]->Run(fetch_tensors);
|
||||
} catch (...) {
|
||||
exception_holder_.Catch(std::current_exception());
|
||||
}
|
||||
return FeedFetchList();
|
||||
};
|
||||
|
||||
if (pool_) {
|
||||
run_futures.emplace_back(pool_->enqueue(std::move(call)));
|
||||
} else {
|
||||
fetch_data.emplace_back(std::move(call()));
|
||||
}
|
||||
}
|
||||
|
||||
if (pool_) {
|
||||
for (auto &f : run_futures) {
|
||||
if (exception_holder_.IsCaught()) {
|
||||
f.wait();
|
||||
} else {
|
||||
fetch_data.emplace_back(std::move(f.get()));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (exception_holder_.IsCaught()) {
|
||||
exception_holder_.ReThrow();
|
||||
}
|
||||
|
||||
for (size_t fetch_idx = 0; fetch_idx < fetch_tensors.size(); ++fetch_idx) {
|
||||
std::vector<const LoDTensor *> lodtensor_ptrs;
|
||||
lodtensor_ptrs.reserve(local_scopes_.size());
|
||||
for (size_t scope_idx = 0; scope_idx < local_scopes_.size(); ++scope_idx) {
|
||||
lodtensor_ptrs.push_back(&fetch_data.at(scope_idx).at(fetch_idx));
|
||||
}
|
||||
ret.emplace_back();
|
||||
ret.back().MergeLoDTensor(lodtensor_ptrs, platform::CPUPlace());
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,51 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ThreadPool.h"
|
||||
#include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
class ParallelSSAGraphExecutor : public SSAGraphExecutor {
|
||||
public:
|
||||
ParallelSSAGraphExecutor(const ExecutionStrategy &strategy,
|
||||
const std::vector<Scope *> &local_scopes,
|
||||
const std::vector<platform::Place> &places,
|
||||
std::vector<std::unique_ptr<ir::Graph>> &&graphs);
|
||||
~ParallelSSAGraphExecutor() final = default;
|
||||
const ir::Graph &Graph() const override { return *graphs_[0]; }
|
||||
|
||||
FeedFetchList Run(const std::vector<std::string> &fetch_tensors) override;
|
||||
|
||||
private:
|
||||
ExecutionStrategy strategy_;
|
||||
std::vector<Scope *> local_scopes_;
|
||||
std::unique_ptr<::ThreadPool> pool_{nullptr};
|
||||
std::vector<platform::Place> places_;
|
||||
std::vector<std::unique_ptr<ir::Graph>> graphs_;
|
||||
|
||||
std::vector<std::unique_ptr<details::ThreadedSSAGraphExecutor>> executors_;
|
||||
ExceptionHolder exception_holder_;
|
||||
};
|
||||
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,130 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef PADDLE_FLUID_FRAMEWORK_IR_LOCK_FREE_OPTIMIZE_PASS_H_
|
||||
#define PADDLE_FLUID_FRAMEWORK_IR_LOCK_FREE_OPTIMIZE_PASS_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
|
||||
#include "paddle/fluid/framework/ir/graph.h"
|
||||
#include "paddle/fluid/framework/ir/pass.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
class Node;
|
||||
|
||||
/*
|
||||
* Remove the sum op of all gradients of the backward op.
|
||||
* And remove the dependecies of the optimizer related to the
|
||||
* same backward op.
|
||||
*
|
||||
* Before this pass:
|
||||
*
|
||||
* forward_op1 forward_op2
|
||||
* | |
|
||||
* grad_op1 grad_op2
|
||||
* \ /
|
||||
* \ /
|
||||
* sum_op
|
||||
* |
|
||||
* sgd_op
|
||||
*
|
||||
* After this pass:
|
||||
* forward_op1 forward_op2
|
||||
* | |
|
||||
* grad_op1 grad_op2
|
||||
* | |
|
||||
* sgd_op1 sgd_op2
|
||||
*
|
||||
* sgd_op1 and sgd_op2 will update the same weight which holds the same
|
||||
* memory, so we could benefits from the acceleration
|
||||
*/
|
||||
class LockFreeOptimizePass : public Pass {
|
||||
public:
|
||||
virtual ~LockFreeOptimizePass() {}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
|
||||
|
||||
private:
|
||||
// Create a new sgd node via current optimizer node
|
||||
ir::Node* CreateNewSGDNode(ir::Graph* graph, ir::Node* forward_node,
|
||||
ir::Node* backward_node, ir::Node* grad_sum_node,
|
||||
ir::Node* optimize_node) const;
|
||||
|
||||
// Replace the input weight's optimizers
|
||||
void ReplaceUpstreamNode(ir::Node* upstream_node,
|
||||
ir::Node* old_optimizer_node,
|
||||
ir::Node* new_optimizer_node) const;
|
||||
|
||||
// Replace the output weight's optimizers
|
||||
void ReplaceAllDownstreamNode(ir::Node* old_optimizer_node,
|
||||
ir::Node* new_optimizer_node) const;
|
||||
|
||||
// Find all weight variables in graph
|
||||
bool FindAllWeightVars(ir::Graph* graph) const;
|
||||
|
||||
// Find the forward_op node via the backward_op node
|
||||
ir::Node* FindForwardOpViaBackwardOp(ir::Graph* graph,
|
||||
ir::Node* backward_node) const;
|
||||
|
||||
std::vector<ir::Node*> FindConnectedNode(ir::Node* upstream_node,
|
||||
ir::Node* downstream_node) const;
|
||||
|
||||
inline bool IsOpNamed(ir::Node* node, const std::string& name) const {
|
||||
PADDLE_ENFORCE(node);
|
||||
|
||||
return node->NodeType() == Node::Type::kOperation && node->Name() == name;
|
||||
}
|
||||
|
||||
inline bool IsVarNamed(ir::Node* node, const std::string& name) const {
|
||||
PADDLE_ENFORCE(node);
|
||||
|
||||
return node->NodeType() == Node::Type::kVariable && node->Name() == name;
|
||||
}
|
||||
|
||||
inline bool IsVarNameEndsWith(ir::Node* node, const std::string& name) const {
|
||||
PADDLE_ENFORCE(node);
|
||||
|
||||
return node->NodeType() == Node::Type::kVariable &&
|
||||
boost::algorithm::ends_with(node->Name(), name);
|
||||
}
|
||||
|
||||
inline bool IsVarNameContains(ir::Node* node, const std::string& name) const {
|
||||
PADDLE_ENFORCE(node);
|
||||
|
||||
return node->NodeType() == Node::Type::kVariable &&
|
||||
node->Name().find(name) != std::string::npos;
|
||||
}
|
||||
|
||||
inline bool IsControlDepFrom(ir::Node* ctrl_dep_node, ir::Node* node) const {
|
||||
PADDLE_ENFORCE(ctrl_dep_node);
|
||||
PADDLE_ENFORCE(node);
|
||||
|
||||
return IsControlDepVar(*ctrl_dep_node) &&
|
||||
ctrl_dep_node->inputs.size() >= 1u &&
|
||||
ctrl_dep_node->inputs[0] == node;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
#endif // PADDLE_FLUID_FRAMEWORK_IR_LOCK_FREE_OPTIMIZE_PASS_H_
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue