modification

upload-readme
sneaxiy 7 years ago
parent d0b2453ecd
commit 612e1a3155

@ -23,8 +23,6 @@
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/framework/details/reference_count_op_handle.h"
namespace paddle {
namespace framework {
namespace details {

@ -89,11 +89,6 @@ class OpHandleBase {
ir::Node *Node() { return node_; }
const std::map<platform::Place, platform::DeviceContext *>
&GetDeviceContexts() const {
return dev_ctxes_;
}
protected:
void RunAndRecordEvent(const std::function<void()> &callback);

@ -69,15 +69,15 @@ class ReferenceCountOpHandle : public OpHandleBase {
std::string Name() const override { return "reference_count"; }
// protected:
protected:
void RunImpl() override {
auto *exec_scope_ = scope_->FindVar(kLocalExecScopeName)->Get<Scope *>();
auto *exec_scope = scope_->FindVar(kLocalExecScopeName)->Get<Scope *>();
std::vector<LoDTensor *> tensors;
for (auto &name : var_names_) {
auto it = ref_cnts_->find(name);
if (it == ref_cnts_->end()) continue;
auto *var = exec_scope_->FindVar(name);
auto *var = exec_scope->FindVar(name);
if (var == nullptr || !var->IsType<LoDTensor>()) continue;
if (it->second.fetch_sub(1) <= 1) {
@ -91,8 +91,8 @@ class ReferenceCountOpHandle : public OpHandleBase {
}
private:
void ClearTensors(const std::vector<LoDTensor *> &tensors) const {
auto *gc = dynamic_cast<const StreamGarbageCollector<Tensor> *>(gc_);
void ClearTensors(const std::vector<LoDTensor *> &tensors) {
auto *gc = dynamic_cast<StreamGarbageCollector<Tensor> *>(gc_);
if (gc != nullptr) {
auto compute_stream = dev_ctx_->stream();
auto callback_stream = gc->stream();

@ -128,12 +128,10 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
std::vector<std::unique_ptr<OpHandleBase>> new_all_ops;
new_all_ops.reserve(compute_ref_cnt_map.size() + all_ops.size());
for (auto &op : all_ops) {
auto it = compute_ref_cnt_map.find(op.get());
new_all_ops.emplace_back(std::move(op));
auto it = compute_ref_cnt_map.find(new_all_ops.back().get());
if (it != compute_ref_cnt_map.end()) {
new_all_ops.emplace_back(std::move(op));
new_all_ops.emplace_back(std::unique_ptr<OpHandleBase>(it->second));
} else {
new_all_ops.emplace_back(std::move(op));
new_all_ops.emplace_back(it->second);
}
}

@ -37,9 +37,11 @@ int kProgramId = -1;
ExecutorPrepareContext::ExecutorPrepareContext(
const framework::ProgramDesc& prog, size_t block_id)
: prog_(prog),
block_id_(block_id),
ref_cnts_(GetNonPersistableReferenceCount<int>(prog, block_id)) {}
: prog_(prog), block_id_(block_id) {
if (GetEagerDeletionThreshold() >= 0) {
ref_cnts_ = GetNonPersistableReferenceCount<int>(prog_, block_id_);
}
}
ExecutorPrepareContext::~ExecutorPrepareContext() {
VLOG(5) << "destroy ExecutorPrepareContext";
@ -331,8 +333,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
CreateVariables(ctx->prog_, local_scope, ctx->block_id_);
}
std::shared_ptr<std::vector<framework::LoDTensor*>> erase_tensors(
new std::vector<framework::LoDTensor*>());
int64_t max_memory_size = GetEagerDeletionThreshold();
std::unique_ptr<GarbageCollector<Tensor>> gc;
@ -353,7 +353,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
for (auto& op : ctx->ops_) {
op->Run(*local_scope, place_);
#ifdef PADDLE_WITH_CUDA
if (gc != nullptr) {
std::vector<std::string> erase_vars;
for (auto& input : op->Inputs()) {
@ -395,7 +394,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
if (!erase_tensors.empty()) gc->Add(erase_tensors);
}
}
#endif
if (FLAGS_benchmark) {
VLOG(2) << "Memory used after operator " + op->Type() + " running: "
@ -403,10 +401,11 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
}
}
if (gc != nullptr)
if (gc != nullptr) {
gc->Wait();
else
} else {
platform::DeviceContextPool::Instance().Get(place_)->Wait();
}
if (local_scope != scope) {
scope->DeleteScope(local_scope);

@ -28,8 +28,6 @@ namespace paddle {
namespace framework {
extern void InitializeVariable(Variable* var, proto::VarType::Type var_type);
int64_t GetEagerDeletionThreshold();
template <typename T>
std::unordered_map<std::string, T> GetNonPersistableReferenceCount(
const ProgramDesc& prog, size_t block_id) {

@ -22,7 +22,6 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/framework/details/reference_count_pass.h"
#include "paddle/fluid/platform/nccl_helper.h"
#endif

@ -29,6 +29,10 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device_context.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/framework/details/reference_count_pass.h"
#endif
namespace paddle {
namespace framework {

@ -32,7 +32,7 @@ DEFINE_bool(
"slow down the destruction of variables.(around 1% performance harm)");
DEFINE_double(
eager_delete_tensor_GB, -1.0,
eager_delete_tensor_gb, -1.0,
"Memory size threshold (GB) when the garbage collector clear tensors."
"Disabled when this value is less than 0");
@ -40,9 +40,9 @@ namespace paddle {
namespace framework {
int64_t GetEagerDeletionThreshold() {
return FLAGS_eager_delete_tensor_GB < 0
return FLAGS_eager_delete_tensor_gb < 0
? -1
: static_cast<int64_t>(FLAGS_eager_delete_tensor_GB *
: static_cast<int64_t>(FLAGS_eager_delete_tensor_gb *
(static_cast<int64_t>(1) << 30));
}

@ -36,8 +36,6 @@ limitations under the License. */
#endif
#include "unsupported/Eigen/CXX11/Tensor"
DECLARE_bool(clear_gpu_memory_when_unused);
namespace paddle {
namespace platform {

@ -122,7 +122,7 @@ def __bootstrap__():
'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir',
'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb',
'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads',
"dist_threadpool_size", 'cpu_deterministic', 'eager_delete_tensor_GB'
"dist_threadpool_size", 'cpu_deterministic', 'eager_delete_tensor_gb'
]
if core.is_compiled_with_dist():
read_env_flags.append('rpc_deadline')

Loading…
Cancel
Save