Remove legacy C++ memory optimization codes (#18834)

* remove legacy memory optimization codes, test=develop * follow huihuang's comments,test=develop * follow luotao's comments, test=develop
6 years ago · 8008ab4e6b
parent 52c1431eee
commit 8008ab4e6b
22 changed files with 66 additions and 2684 deletions
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@ -133,7 +133,7 @@ cc_test(version_test SRCS version_test.cc DEPS version)

 cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version)

-cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc memory_optimize_helper)
+cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc)

 nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)

@ -204,7 +204,6 @@ cc_library(prune SRCS prune.cc DEPS framework_proto)
 cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)
 cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry
        proto_desc)
-cc_test(inplace_op_inference_test SRCS inplace_op_inference_test.cc DEPS inplace_op_pass op_registry proto_desc op_info memory_optimize_helper pass_builder)
 cc_library(selected_rows SRCS selected_rows.cc DEPS tensor)
 cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows)

--- a/paddle/fluid/framework/details/CMakeLists.txt
+++ b/paddle/fluid/framework/details/CMakeLists.txt
@ -62,7 +62,7 @@ cc_library(gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope d

 cc_library(eager_deletion_op_handle SRCS eager_deletion_op_handle.cc DEPS lod_tensor selected_rows reference_count_pass_helper)

-set(SSA_GRAPH_EXECUTOR_DEPS graph framework_proto sequential_execution_pass modify_op_lock_and_record_event_pass all_reduce_deps_pass reference_count_pass eager_deletion_pass memory_optimize_pass inplace_op_pass buffer_shared_inplace_op_pass buffer_shared_cross_op_memory_reuse_pass)
+set(SSA_GRAPH_EXECUTOR_DEPS graph framework_proto sequential_execution_pass modify_op_lock_and_record_event_pass all_reduce_deps_pass reference_count_pass eager_deletion_pass buffer_shared_inplace_op_pass buffer_shared_cross_op_memory_reuse_pass)
 cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ${SSA_GRAPH_EXECUTOR_DEPS})

 cc_library(threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope
@ -92,6 +92,6 @@ cc_library(build_strategy SRCS build_strategy.cc DEPS
        multi_devices_graph_print_pass multi_devices_graph_check_pass
        fuse_elewise_add_act_pass multi_batch_merge_pass 
        fuse_relu_depthwise_conv_pass
-        memory_optimize_pass lock_free_optimize_pass
+        lock_free_optimize_pass
        coalesce_grad_tensor_pass fuse_all_reduce_op_pass backward_optimizer_op_deps_pass
-        fuse_adam_op_pass fuse_sgd_op_pass fuse_momentum_op_pass record_skip_memory_opt_vars_pass)
+        fuse_adam_op_pass fuse_sgd_op_pass fuse_momentum_op_pass)
--- a/paddle/fluid/framework/details/build_strategy.cc
+++ b/paddle/fluid/framework/details/build_strategy.cc
@ -24,8 +24,6 @@ limitations under the License. */
 #include "paddle/fluid/framework/ir/graph_printer.h"
 #include "paddle/fluid/framework/ir/graph_to_program_pass.h"
 #include "paddle/fluid/framework/ir/graph_viz_pass.h"
-#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_helper.h"
-#include "paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.h"
 #include "paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h"

 DECLARE_bool(use_mkldnn);
@ -51,17 +49,13 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
    ResolveOptionConfliction();

    AppendPrintGraphPass("graph_viz_pass", "_original_graph");
-    // Note(zcd): record_skip_memory_opt_vars_pass should
-    // be the first pass.
-    AppendPass("record_skip_memory_opt_vars_pass");
    AppendPassWithCheck(strategy_.enable_sequential_execution_,
                        "sequential_execution_pass");
    AppendPassWithCheck(strategy_.sync_batch_norm_, "sync_batch_norm_pass");

    AppendOpFusePasses();
    AppendPrintGraphPass("graph_viz_pass", "_fused_graph");
-    // TODO(dev-paddle): memory optimize pass should be placed last.
-    AppendMemoryOptimizePasses();
+
    AppendMultiDevPass();
    AppendMultiGraphOptPasses();

@ -147,23 +141,6 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
    }
  }

-  void AppendMemoryOptimizePasses() {  // Append Memory Optimize Pass
-    // TODO(zjl): refactor MemoryOptimizePass to fit
-    // new strategy, which does not need to set
-    // var.persistable = True
-    if (strategy_.use_legacy_memory_optimize_strategy_) {
-      AppendPassWithCheck(strategy_.enable_inplace_, "inplace_pass");
-    }
-    // NOTE(dzh): memory optimize should be a runtime pass.
-    // However, after multi_devices_pass, VarHandle, OpHandle is
-    // the de-fact IR, any reuse on Graph is meaningless.
-    // A side-effect of that, memory optimize cannot forsee the fetched vars
-    // , so fetchlist should be set persistable before call the Run interface.
-    if (strategy_.use_legacy_memory_optimize_strategy_) {
-      AppendPassWithCheck(strategy_.memory_optimize_, "memory_optimize_pass");
-    }
-  }
-
  void SetCollectiveContext() const {
    CollectiveContext *context = CollectiveContext::GetInstance();
    context->endpoints_ = strategy_.trainers_endpoints_;
@ -330,9 +307,6 @@ ir::Graph *BuildStrategy::Apply(ir::Graph *graph,
                        "GPU, skipped.";
        continue;
      }
-    } else if (pass->Type() == "inplace_pass") {
-      pass->Erase(ir::kUseCuda);
-      pass->Set<bool>(ir::kUseCuda, new bool(use_cuda));
    } else if (pass->Type() == "mkldnn_placement_pass") {
      pass->Set("mkldnn_enabled_op_types",
                new std::unordered_set<std::string>(mkldnn_enabled_op_types_));
@ -365,12 +339,10 @@ USE_PASS(all_reduce_mode_multi_devices_pass);
 USE_PASS(dist_multi_devices_pass);
 USE_PASS(multi_devices_check_pass);
 USE_PASS(multi_devices_print_pass);
-USE_PASS(memory_optimize_pass);
 USE_PASS(sequential_execution_pass);
 USE_PASS(all_reduce_deps_pass);
 USE_PASS(backward_optimizer_op_deps_pass);
 USE_PASS(modify_op_lock_and_record_event_pass);
-USE_PASS(inplace_pass);
 USE_PASS(lock_free_optimize_pass);
 USE_PASS(coalesce_grad_tensor_pass);
 USE_PASS(graph_to_program_pass);
@ -379,7 +351,6 @@ USE_PASS(fuse_sgd_op_pass);
 USE_PASS(fuse_momentum_op_pass);
 USE_PASS(fuse_all_reduce_op_pass);
 USE_PASS(runtime_context_cache_pass);
-USE_PASS(record_skip_memory_opt_vars_pass);
 #ifdef PADDLE_WITH_MKLDNN
 USE_PASS(mkldnn_placement_pass);
 #endif
--- a/paddle/fluid/framework/details/build_strategy.h
+++ b/paddle/fluid/framework/details/build_strategy.h
@ -19,6 +19,7 @@
 #include <unordered_set>
 #include <utility>
 #include <vector>
+#include "boost/optional.hpp"
 #include "paddle/fluid/framework/ir/pass_builder.h"
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/fluid/framework/scope.h"
@ -108,14 +109,14 @@ struct BuildStrategy {
  // FLAGS_use_mkldnn=false
  std::unordered_set<std::string> mkldnn_enabled_op_types_;

-  bool memory_optimize_{false};
+  // By default, memory_optimize would be opened if gc is disabled, and
+  // be closed if gc is enabled.
+  // Users can forcely enable/disable memory_optimize by setting True/False.
+  boost::optional<bool> memory_optimize_{boost::none};

  // Turn on inplace by default.
  bool enable_inplace_{true};

-  // TODO(zjl): Remove this flag when MemoryOptimizePass is refactored
-  bool use_legacy_memory_optimize_strategy_{false};
-
  // FIXME(zcd): is_distribution_ is a temporary field, because in pserver mode,
  // num_trainers is 1, so the current fields of build_strategy doesn't tell if
  // it's distributed model.
--- a/paddle/fluid/framework/inplace_op_inference.h
+++ b/paddle/fluid/framework/inplace_op_inference.h
@ -13,13 +13,8 @@
 // limitations under the License.

 #pragma once
-#include <functional>
-#include <numeric>
 #include <string>
 #include <unordered_map>
-#include <unordered_set>
-#include "glog/logging.h"
-#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_helper.h"
 #include "paddle/fluid/framework/op_desc.h"
 #include "paddle/fluid/framework/type_defs.h"

--- a/paddle/fluid/framework/inplace_op_inference_test.cc
+++ b/paddle/fluid/framework/inplace_op_inference_test.cc
--- a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
@ -4,20 +4,8 @@ cc_library(recurrent_op_eager_deletion_pass SRCS recurrent_op_eager_deletion_pas
 cc_library(reference_count_pass_helper SRCS reference_count_pass_helper.cc DEPS garbage_collector computation_op_handle var_handle)
 cc_library(reference_count_pass SRCS reference_count_pass.cc DEPS computation_op_handle graph graph_helper pass op_graph_view reference_count_pass_helper)

-if(WITH_GPU)
-    cc_library(memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper gpu_info)
-else()
-    cc_library(memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper cpu_info)
-endif()
-
-cc_library(memory_optimize_pass SRCS memory_optimize_pass.cc DEPS memory_optimize_helper pass)
-cc_library(inplace_op_pass SRCS inplace_op_pass.cc DEPS memory_optimize_pass op_info)
-
-cc_test(memory_optimize_helper_test SRCS memory_optimize_helper_test.cc memory_optimize_helper.cc DEPS framework_proto graph graph_helper op_registry)
-
 cc_library(eager_deletion_pass SRCS eager_deletion_pass.cc DEPS computation_op_handle
    eager_deletion_op_handle graph graph_helper pass while_op_eager_deletion_pass recurrent_op_eager_deletion_pass reference_count_pass_helper)
-cc_library(record_skip_memory_opt_vars_pass SRCS record_skip_memory_opt_vars_pass.cc DEPS graph graph_helper)

 cc_library(memory_reuse_pass SRCS memory_reuse_pass.cc DEPS computation_op_handle reference_count_pass_helper share_tensor_buffer_op_handle multi_devices_helper graph pass) 

--- a/paddle/fluid/framework/ir/memory_optimize_pass/inplace_op_pass.cc
+++ b/paddle/fluid/framework/ir/memory_optimize_pass/inplace_op_pass.cc
--- a/paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_helper.cc
+++ b/paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_helper.cc
--- a/paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_helper.h
+++ b/paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_helper.h
@ -1,187 +0,0 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <algorithm>
-#include <iostream>
-#include <iterator>
-#include <list>
-#include <map>
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-#include "paddle/fluid/framework/data_type.h"
-#include "paddle/fluid/framework/ir/graph.h"
-
-namespace paddle {
-namespace framework {
-namespace ir {
-
-/// this attribute is used to avoid some core variables removed/reused
-/// in memory optimize related passes
-constexpr char kMemOptSkipVars[] = "@MEM_OPT_SKIP_VARS@";
-typedef std::unordered_set<std::string> MemOptSkipVars;
-
-std::vector<ir::Node*> SortOpLikeDescOrder(const ir::Graph& graph);
-
-// NOTE(dzh): A ordered set for node reuse in memory optimize.
-// the orderedset sort node in ascend order(by node bytes size).
-// in fluid, -1 means the batch_size, which is determined in runtime.
-// So the reuse happens between nodes who's batch_size both are -1
-// simultaneously or not.
-//
-// sort rule:
-// rule 0 : smaller node ranking in front.
-// rule 1 : batch_size equal -1 ranking in the front than the node not.
-//
-// For example,
-// node0[-1, 1] node1[-1, 1, 1], node2[1,1], node3[1,1024], ..
-
-class OrderedSet {
- public:
-  // nodes with same name exists in pool.
-  using NodeVector = std::vector<ir::Node*>;
-  using Iter = typename std::list<NodeVector>::iterator;
-  using ConstIter = typename std::list<NodeVector>::const_iterator;
-
-  void Insert(ir::Node* var);
-  void Erase(ir::Node* var);
-  void Erase(const std::string& var);
-  bool Has(ir::Node* var) const;
-  void Clear() {
-    mark_table_.clear();
-    nodes_.clear();
-  }
-  // find the bestfit shape node block with var.
-  ir::Node* FindBestFitNode(ir::Node* var) const;
-  ir::Node* FindNextBestFitNode(ir::Node* var, ir::Node* prev) const;
-  // map store non-const iterator, can not promise const
-  int GetNodeIndexInPool(ir::Node* var);
-  // pool all node to string
-  std::string ToString() const;
-
-  Iter begin() { return nodes_.begin(); }
-  Iter end() { return nodes_.end(); }
-  ConstIter begin() const { return nodes_.begin(); }
-  ConstIter end() const { return nodes_.end(); }
-
-  size_t size() const { return nodes_.size(); }
-
- private:
-  // for searching.
-  std::unordered_map<std::string, Iter> mark_table_;
-  // node pool
-  std::list<NodeVector> nodes_;
-};
-
-class ControlFlowGraph {
- public:
-  ControlFlowGraph() = default;
-  // IR Graph
-  explicit ControlFlowGraph(const ir::Graph& graph);
-
-  void LiveVariableAnalysis();
-
-  void RenameVarInCFGGraph(const std::string& old_node,
-                           const std::string& new_node, int begin_idx);
-
-  const std::set<std::string>& LiveIn(ir::Node* op) const;
-  const std::set<std::string>& LiveOut(ir::Node* op) const;
-  const std::set<std::string>& Use(ir::Node* op) const;
-  const std::set<std::string>& Unlived(ir::Node* op) const;
-  const std::vector<ir::Node*>& Ops() const;
-  std::vector<ir::Node*>& Ops();
-
-  // for ssa-graph nodes
-  ir::Node* GetNodeByName(const std::string& name, ir::Node* op) const;
-
- private:
-  void BuildCFGGraph();
-  void ConnectNodes();
-
-  using NodeListMap = std::unordered_map<ir::Node*, std::set<ir::Node*>>;
-  using VarSetMap = std::map<ir::Node*, std::set<std::string>>;
-  // successors ops use the output variables.
-  NodeListMap successors_;
-  // predecessors ops generated input variables.
-  NodeListMap predecessors_;
-  // variables lived before run current op.
-  VarSetMap live_in_;
-  // variables lived after run current op.
-  VarSetMap live_out_;
-  VarSetMap uses_;  // op inputs
-  VarSetMap defs_;  // op outputs
-  std::unordered_map<ir::Node*, std::set<std::string>> unlived_vars_;
-
-  std::vector<ir::Node*> ops_;  // op sequence by topology sort
-};
-
-// valid a tensor can be reuse or not
-bool NodeCanReused(ir::Node* node);
-
-// valid a tensor can be reuse or not.
-bool NodeCanReused(const VarDesc& node);
-
-// check op has subblock or not
-bool OpHasSubBlock(OpDesc* desc);
-
-// node memory size in bytes
-size_t NodeSize(ir::Node* n);
-
-// node memory size in bytes
-size_t NodeSize(const VarDesc&);
-
-std::string DebugString(ir::Node* var);
-
-VarDesc* GetVarDesc(ir::Node* n);
-
-static inline bool IsSameDesc(OpDesc* op1, OpDesc* op2) {
-  return op1->Type() == op2->Type() && op1->Inputs() == op2->Inputs() &&
-         op1->Outputs() == op2->Outputs();
-}
-
-template <typename Container, typename Callback>
-class FilterVariableImpl {
- public:
-  void operator()(const Container& nodes, Callback callback) {
-    for (auto* node : nodes) {
-      callback(node);
-    }
-  }
-};
-
-// filter var node for op->inputs/outputs
-template <typename Callback>
-class FilterVariableImpl<std::vector<ir::Node*>, Callback> {
- public:
-  void operator()(const std::vector<ir::Node*>& nodes, Callback callback) {
-    for (auto* var : nodes) {
-      if (var->IsVar() && !var->IsCtrlVar()) {
-        callback(var);
-      }
-    }
-  }
-};
-
-template <typename Container, typename Callback>
-void FilterVariables(const Container& nodes, Callback callback) {
-  FilterVariableImpl<Container, Callback>()(nodes, callback);
-}
-
-}  // namespace ir
-}  // namespace framework
-}  // namespace paddle
--- a/paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_helper_test.cc
+++ b/paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_helper_test.cc
--- a/paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_pass.cc
+++ b/paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_pass.cc
@ -1,224 +0,0 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_pass.h"
-#include <algorithm>
-#include <atomic>
-#include <deque>
-#include <fstream>
-#include <iostream>
-#include <iterator>
-#include <memory>
-#include <queue>
-#include <sstream>
-#include <string>
-#include <type_traits>
-#include <unordered_set>
-#include <vector>
-#include "gflags/gflags.h"
-#include "paddle/fluid/framework/data_type.h"
-#include "paddle/fluid/framework/ir/graph.h"
-#include "paddle/fluid/framework/ir/graph_helper.h"
-
-namespace paddle {
-namespace framework {
-namespace ir {
-
-void MemoryOptimizePass::ApplyImpl(ir::Graph* graph) const {
-  CollectSkipVarsSet(graph);
-
-  cfg_.reset(new ControlFlowGraph(*graph));
-  cfg_->LiveVariableAnalysis();
-  InitSSAGraphNodes();
-
-  int reuse_id = 0;
-  for (size_t idx = 0; idx < cfg_->Ops().size(); ++idx) {
-    auto& op = cfg_->Ops()[idx];
-    auto* op_desc = op->Op();
-    // some op in graph has no op desc
-    if (op_desc == nullptr) continue;
-
-    for (auto& var : op->outputs) {
-      if (var->IsVar() && !var->IsCtrlVar() && skip_set_.count(var->Name())) {
-        VLOG(3) << "Skip set contains variable of " << var->Name()
-                << "disable reuse on it. skipped";
-        continue;
-      }
-      if (NodeCanReused(var) && cfg_->Use(op).count(var->Name()) == 0) {
-        ir::Node* cache = pool_.FindBestFitNode(var);
-        while (cache != nullptr && var->Name() == cache->Name()) {
-          VLOG(3) << "The same cache variable is cascade reused. "
-                  << cache->Name() << " is re-filled to the pool after "
-                  << "the reused op is finished. Current op can not "
-                  << "replace it again. Skip this candidate.";
-          cache = pool_.FindNextBestFitNode(var, cache);
-        }
-
-        if (cache != nullptr) {
-          int node_idx_in_pool = pool_.GetNodeIndexInPool(cache);
-          VLOG(3) << string::Sprintf(
-              "!!! %s,  %s => %s, cache idx %d, pool size %d",
-              std::to_string(reuse_id++), DebugString(var), DebugString(cache),
-              node_idx_in_pool, static_cast<int>(pool_.size()));
-          // NOTE(dzhwinter): update the ProgramDesc/IR Graph
-          // and the CFG Graph on the fly.
-          //
-          // IR Graph define the dependence relationship between nodes.
-          //
-          // ProgramDesc defines the input/output vars. Its used in
-          // CreateOp, CreateVar when running happens.
-          //
-          // CFG Graph store the liveness information, when reuse happens
-          // we also need to update the variable liveness.
-          const std::string var_name = var->Name();
-          const std::string cache_name = cache->Name();
-
-          cfg_->RenameVarInCFGGraph(var_name, cache_name, idx);
-          RenameVarInGraphDesc(var_name, cache_name, idx);
-          RenameVarInGraphNode(var_name, cache_name, idx, graph);
-          pool_.Erase(cache_name);
-        }
-      }
-    }
-    // fill the pool
-    for (auto& var : cfg_->Unlived(op)) {
-      ir::Node* var_node = cfg_->GetNodeByName(var, op);
-      if (var_node == nullptr || var_node->IsCtrlVar()) continue;
-      if (NodeCanReused(var_node) && !pool_.Has(var_node)) {
-        pool_.Insert(var_node);
-      }
-    }
-  }
-  graph->ResolveHazard(var_nodes_);
-}
-
-void MemoryOptimizePass::CollectSkipVarsSet(ir::Graph* graph) const {
-  // fill skip_set_
-  PADDLE_ENFORCE(graph->Has(kMemOptSkipVars));
-  auto& mem_opt_whitelist = graph->Get<MemOptSkipVars>(kMemOptSkipVars);
-  for (const auto& var : mem_opt_whitelist) {
-    skip_set_.emplace(var);
-  }
-}
-
-void MemoryOptimizePass::RenameVarInGraphDesc(const std::string& var,
-                                              const std::string& cache_var,
-                                              size_t idx) const {
-  for (size_t i = idx; i < cfg_->Ops().size(); ++i) {
-    auto* op = cfg_->Ops()[i];
-    PADDLE_ENFORCE(op->IsOp() && op->Op());
-    auto* op_desc = op->Op();
-    op_desc->RenameInput(var, cache_var);
-    op_desc->RenameOutput(var, cache_var);
-    if (op_desc->Block() != nullptr) {
-      op_desc->Block()->RemoveVar(var);
-    } else {
-      LOG(WARNING) << "op " << op->Name() << " not know its block."
-                   << "Is the op_desc created without block pointer? "
-                   << "Can not find " << var << " in Block(0)";
-    }
-    op_desc->Flush();
-  }
-}
-
-void MemoryOptimizePass::InitSSAGraphNodes() const {
-  std::unordered_map<std::string, std::unordered_set<ir::Node*>> all_vars;
-  if (var_nodes_.empty()) {
-    for (auto* op : cfg_->Ops()) {
-      for (auto* node : op->inputs) {
-        if (all_vars[node->Name()].count(node) == 0) {
-          all_vars[node->Name()].emplace(node);
-          var_nodes_[node->Name()].emplace_back(node);
-        }
-      }
-      for (auto* node : op->outputs) {
-        if (all_vars[node->Name()].count(node) == 0) {
-          all_vars[node->Name()].emplace(node);
-          var_nodes_[node->Name()].emplace_back(node);
-        }
-      }
-    }
-  }
-}
-
-void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
-                                              const std::string& cache_var,
-                                              size_t idx,
-                                              ir::Graph* graph) const {
-  // if replace happens, we need to create a newer version cache_var
-  // but use the same dims/data_type with var.
-  PADDLE_ENFORCE(var_nodes_[var].size() >= 1 &&
-                 var_nodes_[var].at(0)->Var() != nullptr);
-  std::unique_ptr<VarDesc> var_desc(new VarDesc(*var_nodes_[var].at(0)->Var()));
-  var_desc->SetName(cache_var);
-
-  for (size_t i = idx; i < cfg_->Ops().size(); ++i) {
-    auto* op = cfg_->Ops()[i];
-
-    // redirect the input to the latest version of cache_var
-    for (auto* node : op->inputs) {
-      if (node->Name() == var) {
-        ir::Node* cache_node = var_nodes_[cache_var].back();
-
-        // swap node to cache_node
-        cache_node->outputs.insert(cache_node->outputs.end(),
-                                   node->outputs.begin(), node->outputs.end());
-        PADDLE_ENFORCE(node->inputs.size() == 1 && node->inputs[0]->IsOp());
-        auto* prev_op = node->inputs[0];
-        std::replace(prev_op->outputs.begin(), prev_op->outputs.end(), node,
-                     cache_node);
-        for (auto* next_op : node->outputs) {
-          std::replace(next_op->inputs.begin(), next_op->inputs.end(), node,
-                       cache_node);
-        }
-
-        // erase unused node
-        auto& nodes = var_nodes_.at(var);
-        nodes.erase(std::remove(nodes.begin(), nodes.end(), node), nodes.end());
-        graph->RemoveNode(node);
-      }
-    }
-
-    // if we need to rename the output,
-    // always create a newer version of cache_var
-    for (auto* node : op->outputs) {
-      if (node->Name() == var) {
-        ir::Node* cache_node = graph->CreateVarNode(var_desc.get());
-        var_nodes_[cache_var].emplace_back(cache_node);
-
-        // swap node to cache node
-        cache_node->outputs.insert(cache_node->outputs.end(),
-                                   node->outputs.begin(), node->outputs.end());
-        cache_node->inputs.emplace_back(op);
-        std::replace(op->outputs.begin(), op->outputs.end(), node, cache_node);
-        for (auto* next_op : node->outputs) {
-          std::replace(next_op->inputs.begin(), next_op->inputs.end(), node,
-                       cache_node);
-        }
-
-        // erase unused node
-        auto& nodes = var_nodes_.at(var);
-        nodes.erase(std::remove(nodes.begin(), nodes.end(), node), nodes.end());
-        graph->RemoveNode(node);
-      }
-    }
-  }
-}
-
-}  // namespace ir
-}  // namespace framework
-}  // namespace paddle
-
-REGISTER_PASS(memory_optimize_pass, paddle::framework::ir::MemoryOptimizePass)
-    .RequireGraphAttr(paddle::framework::details::kStaleProgramOpDescs);
--- a/paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_pass.h
+++ b/paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_pass.h
@ -1,72 +0,0 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <algorithm>
-#include <list>
-#include <map>
-#include <memory>
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "paddle/fluid/framework/data_type.h"
-#include "paddle/fluid/framework/ir/graph.h"
-#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_helper.h"
-#include "paddle/fluid/framework/ir/pass.h"
-
-namespace paddle {
-namespace framework {
-namespace ir {
-
-class MemoryOptimizePass : public ir::Pass {
- protected:
-  void ApplyImpl(ir::Graph* graph) const override;
-  // fill the variable map(var_nodes) by version.
-  void InitSSAGraphNodes() const;
-
- private:
-  // update program descs
-  void RenameVarInGraphDesc(const std::string& var,
-                            const std::string& cache_var, size_t idx) const;
-  // update ir nodes
-  void RenameVarInGraphNode(const std::string& var,
-                            const std::string& cache_var, size_t idx,
-                            ir::Graph* graph) const;
-
-  void SubGraphOptimize(OpDesc* op_desc) const;
-  // 1. scan op with subblock and collect the output/input vars.
-  // while, while_grad, conditional_block
-  // 2. scan distributed ops and collect the output/input vars
-  // 3. op_role_vars
-  void CollectSkipVarsSet(ir::Graph* graph) const;
-
- private:
-  // Reuse Node Pool, Owned.
-  mutable OrderedSet pool_;
-  // controlflow Graph
-  mutable std::unique_ptr<ControlFlowGraph> cfg_;
-  // skip set
-  mutable std::unordered_set<std::string> skip_set_;
-  // var nodes
-  mutable std::map<std::string, std::vector<ir::Node*>> var_nodes_;
-};
-
-}  // namespace ir
-}  // namespace framework
-}  // namespace paddle
--- a/paddle/fluid/framework/ir/memory_optimize_pass/record_skip_memory_opt_vars_pass.cc
+++ b/paddle/fluid/framework/ir/memory_optimize_pass/record_skip_memory_opt_vars_pass.cc
@ -1,170 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <string>
-#include <unordered_set>
-#include <vector>
-#include "paddle/fluid/framework/ir/graph.h"
-#include "paddle/fluid/framework/ir/graph_helper.h"
-#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_helper.h"
-#include "paddle/fluid/framework/ir/pass.h"
-#include "paddle/fluid/framework/op_proto_maker.h"
-#include "paddle/fluid/framework/operator.h"
-
-namespace paddle {
-namespace framework {
-namespace ir {
-
-class RecordSkipMemoryOptVarsPass : public ir::Pass {
- protected:
-  void ApplyImpl(ir::Graph* graph) const override {
-    PADDLE_ENFORCE(!graph->Has(kMemOptSkipVars));
-    graph->Set(kMemOptSkipVars, new MemOptSkipVars);
-    auto& skip_vars = graph->Get<MemOptSkipVars>(kMemOptSkipVars);
-
-    std::vector<ir::Node*> op_nodes;
-    for (auto& node : graph->Nodes()) {
-      PADDLE_ENFORCE_NOT_NULL(node, "The node should not be nullptr.");
-      if (node->IsOp() && node->Op()) {
-        op_nodes.emplace_back(node);
-      }
-    }
-
-    // Insert kEmptyVarName to avoid optimizing empty variable
-    skip_vars.insert(framework::kEmptyVarName);
-
-    // NOTE(zcd): Insert OpRoleVars to SkipVarSet to prevent the vars are rename
-    // in memory optimize pass.
-    InsertOpRoleVarsToSkipVarSet(op_nodes, &skip_vars);
-
-    InsertSkipMemOptOpInOutToSkipVarSet(op_nodes, &skip_vars);
-  }
-
- private:
-  static void InsertOpRoleVarsToSkipVarSet(const std::vector<ir::Node*>& ops,
-                                           MemOptSkipVars* skip_vars) {
-    for (auto& node : ops) {
-      try {
-        auto op_role_vars =
-            boost::get<std::vector<std::string>>(node->Op()->GetNullableAttr(
-                OpProtoAndCheckerMaker::OpRoleVarAttrName()));
-        PADDLE_ENFORCE_EQ(op_role_vars.size() % 2, 0);
-        for (size_t i = 0; i < op_role_vars.size(); i += 2) {
-          auto& g_name = op_role_vars[i + 1];
-          skip_vars->insert(g_name);
-        }
-      } catch (boost::bad_get& e) {
-      }
-    }
-  }
-
-  static void UpdateSkipVarSet(
-      MemOptSkipVars* skip_vars,
-      const std::vector<std::vector<std::string>>& var_names) {
-    for (auto& var_name : var_names) {
-      skip_vars->insert(var_name.begin(), var_name.end());
-    }
-  }
-
-  static std::vector<std::string> ToGradVarName(
-      const std::vector<std::string>& names) {
-    std::vector<std::string> ret;
-    ret.reserve(names.size());
-    for (auto& name : names) {
-      if (name != framework::kEmptyVarName) {
-        ret.emplace_back(framework::GradVarName(name));
-      }
-    }
-    return ret;
-  }
-
-  static void InsertSkipMemOptOpInOutToSkipVarSet(
-      const std::vector<ir::Node*>& ops, MemOptSkipVars* skip_vars) {
-    static std::unordered_set<std::string> kSkipMemOptOps{
-        "send", "recv", "prefetch", "send_barrier", "fetch_barrier"};
-
-    for (auto& node : ops) {
-      auto* op_desc = node->Op();
-      // Some ops (while, conditional_block, recurrent, etc.) have sub-blocks.
-      // These ops often use variables from its parent or forward blocks.
-      // Optimizing in/out of such ops would make these variables cannot
-      // be found when running sub-block ops.
-      if (OpHasSubBlock(op_desc)) {
-        UpdateSkipVarSet(skip_vars, {op_desc->InputArgumentNames(),
-                                     op_desc->OutputArgumentNames()});
-      }
-
-      // Skip ops that are related to parameter server.
-      // In distributed mode, trainers and parameter server use same
-      // variable names to track same variables. We cannot change the
-      // names of these variables, otherwise trainers or parameter
-      // server would not find them.
-      if (kSkipMemOptOps.count(op_desc->Type()) > 0) {
-        UpdateSkipVarSet(skip_vars, {op_desc->InputArgumentNames(),
-                                     op_desc->OutputArgumentNames()});
-      }
-
-      // FIXME(zjl): some ops use variables that are not from their
-      // inputs or outputs. We do not have a nice method to solve this
-      // issue yet. Currently, we should skip these variables when
-      // memory optimization is enabled.
-      auto op_type = op_desc->Type();
-      if (op_type == "while_grad") {
-        // In while_grad, framework::GradVarName(Input("X")) is visited
-        // without being any in/out of while_grad. While_grad uses
-        // these variable to accumulate gradient of X across time steps.
-        UpdateSkipVarSet(skip_vars, {ToGradVarName(op_desc->Input("X"))});
-      } else if (op_type == "conditional_block_grad") {
-        // In conditional_block_grad, framework::GradVarName(Input("Input",
-        // "Cond")) is visited without being any in/out of
-        // conditional_block_grad. Conditional_block_grad uses these
-        // variables to accumulate gradient of Input/Cond across time steps.
-        UpdateSkipVarSet(skip_vars, {ToGradVarName(op_desc->Input("Input")),
-                                     ToGradVarName(op_desc->Input("Cond"))});
-      } else if (op_type == "recurrent" || op_type == "recurrent_grad") {
-        // Recurrent and recurrent_grad ops are implemented by a very trickly
-        // way. Attr("states", "ex_states") is visited without being any
-        // in/out of op. It is because these variables are from sub blocks,
-        // not main block. Adding these variables to input would make recurrent
-        // fail since "states" and "ex_states" cannot be found in main block.
-        // When memory optimization is enabled, "states", "ex_states" and their
-        // gradient should be skipped.
-        auto ex_states =
-            boost::get<std::vector<std::string>>(op_desc->GetAttr("ex_states"));
-        auto states =
-            boost::get<std::vector<std::string>>(op_desc->GetAttr("states"));
-        if (op_type == "recurrent") {
-          UpdateSkipVarSet(skip_vars, {ex_states, states});
-        } else {
-          // In recurrent_grad, framework::GradVarName(Input("parameters",
-          // "input")) is visited without being any in/out of recurrent_grad.
-          // Recurrent_grad uses these variables to accumulate gradient of
-          // parameters/input across time steps.
-          UpdateSkipVarSet(
-              skip_vars,
-              {ToGradVarName(op_desc->Input("parameters")),
-               ToGradVarName(op_desc->Input("inputs")), ex_states, states,
-               ToGradVarName(ex_states), ToGradVarName(states)});
-        }
-      }
-    }
-  }
-};
-
-}  // namespace ir
-}  // namespace framework
-}  // namespace paddle
-
-REGISTER_PASS(record_skip_memory_opt_vars_pass,
-              paddle::framework::ir::RecordSkipMemoryOptVarsPass);
--- a/paddle/fluid/framework/ir/multi_devices_graph_pass/sequential_execution_pass.cc
+++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/sequential_execution_pass.cc
@ -17,7 +17,6 @@
 #include <unordered_set>
 #include <vector>
 #include "paddle/fluid/framework/ir/graph.h"
-#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_helper.h"
 #include "paddle/fluid/framework/ir/pass.h"
 #include "paddle/fluid/framework/op_proto_maker.h"

--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@ -252,7 +252,22 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
    VLOG(10) << "buffer_shared_inplace_pass Applied";
  }

-  if (build_strategy_.memory_optimize_) {
+  /**
+   * NOTE(zengjinle): If BuildStrategy.memory_optimize = None in Python,
+   * set BuildStrategy.memory_optimize according to whether gc is enabled.
+   * If gc is enabled, BuildStrategy.memory_optimize = False.
+   * If gc is disabled, BuildStrategy.memory_optimize = True.
+   * This is because gc+memory_optimize is worse than gc only.
+   *
+   * As an option, users can enable BuildStrategy.memory_optimize forcely
+   * by setting True, and disable it forcely by setting False.
+   */
+  bool is_gc_enabled = (GetEagerDeletionThreshold() >= 0);
+  if (!build_strategy_.memory_optimize_) {
+    build_strategy_.memory_optimize_ = !is_gc_enabled;
+  }
+
+  if (build_strategy_.memory_optimize_.get()) {
    auto cross_op_memory_reuse_pass = ir::PassRegistry::Instance().Get(
        "buffer_shared_cross_op_memory_reuse_pass");
    cross_op_memory_reuse_pass->SetNotOwned(ir::kMemOptVarInfoMapList,
@ -265,7 +280,7 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
    VLOG(10) << "buffer_shared_cross_op_memory_reuse_pass Applied";
  }

-  if (GetEagerDeletionThreshold() < 0) {
+  if (!is_gc_enabled) {
    return graph;
  }
  size_t max_memory_size = static_cast<size_t>(GetEagerDeletionThreshold());
@ -313,6 +328,9 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
    eager_deletion_pass->SetNotOwned(ir::kAllPlaces, &places_);
    graph = eager_deletion_pass->Apply(graph);
    VLOG(10) << "EagerDeletionPass Applied";
+    LOG(INFO) << "Garbage collection strategy is enabled, when "
+              << "FLAGS_eager_delete_tensor_gb = "
+              << (static_cast<double>(GetEagerDeletionThreshold()) / (1 << 30));
  }
  return graph;
 }
--- a/paddle/fluid/pybind/const_value.cc
+++ b/paddle/fluid/pybind/const_value.cc
@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "paddle/fluid/pybind/const_value.h"
-#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_pass.h"
 #include "paddle/fluid/framework/ir/node.h"
 #include "paddle/fluid/framework/op_proto_maker.h"
 #include "paddle/fluid/framework/operator.h"
@ -34,7 +33,6 @@ void BindConstValue(pybind11::module* m) {
  m->def("kControlDepVarName",
         [] { return framework::ir::Node::kControlDepVarName; });
  m->def("kNewGradSuffix", [] { return framework::kNewGradSuffix; });
-  m->def("kMemOptSkipVars", [] { return framework::ir::kMemOptSkipVars; });

  auto op_proto_and_checker_maker =
      m->def_submodule("op_proto_and_checker_maker");
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@ -1548,17 +1548,31 @@ All parameter, weight, gradient are variables in Paddle.
                )DOC")
      .def_property(
          "memory_optimize",
-          [](const BuildStrategy &self) { return self.memory_optimize_; },
-          [](BuildStrategy &self, bool b) { self.memory_optimize_ = b; },
-          R"DOC(The type is BOOL, memory opitimize aims to save total memory
+          [](const BuildStrategy &self) -> py::object {
+            if (self.memory_optimize_) {
+              return py::cast(self.memory_optimize_.get());
+            } else {
+              return py::cast(nullptr);
+            }
+          },
+          [](BuildStrategy &self, const py::handle &value) {
+            auto *py_obj = value.ptr();
+            if (py_obj == nullptr || py_obj == Py_None) {
+              self.memory_optimize_ = boost::none;
+            } else if (PyBool_Check(py_obj)) {
+              self.memory_optimize_ = (py_obj == Py_True);
+            } else {
+              PADDLE_THROW(
+                  "BuildStrategy.memory_optimize must be None, False or True");
+            }
+          },
+          R"DOC(The type is BOOL or None, memory opitimize aims to save total memory
                consumption, set to True to enable it.

-                Memory Optimize is our experimental feature, some variables
-                may be reused/removed by optimize strategy. If you need to
-                fetch some variable values when using this feature, please
-                set the persistable property of the variables to True.
-
-                Default False)DOC")
+                Default None. None means framework would choose to use or not use 
+                this strategy automatically. Currently, None means that it is 
+                enabled when GC is disabled, and disabled when GC is enabled. 
+                True means enabling and False means disabling. Default None.)DOC")
      .def_property(
          "is_distribution",
          [](const BuildStrategy &self) { return self.is_distribution_; },
@ -1578,13 +1592,6 @@ All parameter, weight, gradient are variables in Paddle.
          "enable_inplace",
          [](const BuildStrategy &self) { return self.enable_inplace_; },
          [](BuildStrategy &self, bool b) { self.enable_inplace_ = b; })
-      .def_property("_use_legacy_memory_optimize_strategy",
-                    [](const BuildStrategy &self) {
-                      return self.use_legacy_memory_optimize_strategy_;
-                    },
-                    [](BuildStrategy &self, bool b) {
-                      self.use_legacy_memory_optimize_strategy_ = b;
-                    })
      .def_property(
          "fuse_all_reduce_ops",
          [](const BuildStrategy &self) { return self.fuse_all_reduce_ops_; },
--- a/python/paddle/fluid/init.py
+++ b/python/paddle/fluid/init.py
@ -206,7 +206,7 @@ def __bootstrap__():
            'cudnn_exhaustive_search', 'selected_gpus', 'sync_nccl_allreduce',
            'limit_of_tmp_allocation',
            'times_excess_than_required_tmp_allocation',
-            'enable_inplace_whitelist', 'cudnn_batchnorm_spatial_persistent'
+            'cudnn_batchnorm_spatial_persistent'
        ]
    core.init_gflags([sys.argv[0]] +
                     ["--tryfromenv=" + ",".join(read_env_flags)])
--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@ -533,36 +533,6 @@ class Executor(object):
            return as_numpy(arr)
        return [arr[i] for i in range(len(arr))]

-    def _check_fetch_vars_persistable(self, program, fetch_list):
-        for var in fetch_list:
-            if isinstance(var, Variable):
-                persistable = var.persistable
-            else:
-                block_num = program.desc.num_blocks()
-                persistable = None
-                var_name = cpt.to_bytes(var)
-                for i in six.moves.range(block_num):
-                    var_desc = program.desc.block(i).find_var(var_name)
-                    if var_desc:
-                        persistable = var_desc.persistable()
-                        break
-                assert persistable is not None, "Variable {} is not found".format(
-                    var)
-
-            if not persistable:
-                logging.warn("""
-     Detect that build_strategy.memory_optimize = True, but the some variables in the fetch
-     list is not persistable, you may get wrong fetched value, or an exeception may be thrown
-     about cannot find variable of the fetch list. 
-
-     TO FIX this:
-         # Sample
-         conv1 = fluid.layers.conv2d(data, 4, 5, 1, act=None) 
-         # if you need to fetch conv1, then:
-         conv1.persistable = True
-
-                 """)
-
    def run(self,
            program=None,
            feed=None,
@ -667,10 +637,6 @@ class Executor(object):
                scope=scope,
                return_numpy=return_numpy,
                use_program_cache=use_program_cache)
-        else:
-            if fetch_list and program._is_data_parallel and program._program and    \
-                    program._build_strategy._use_legacy_memory_optimize_strategy:
-                self._check_fetch_vars_persistable(program._program, fetch_list)

        program._compile(scope, self.place)
        if program._is_data_parallel:
--- a/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py
+++ b/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py
@ -61,16 +61,13 @@ class TestSoftmaxWithXe(unittest.TestCase):

                build_strategy = fluid.BuildStrategy()
                build_strategy.enable_inplace = inplace
-                if inplace:
-                    build_strategy._use_legacy_memory_optimize_strategy = True
                prog = fluid.CompiledProgram(fluid.default_main_program(
                )).with_data_parallel(
                    build_strategy=build_strategy, places=place)

-                if inplace:
-                    fetch_list = [z_d.name, x_d.name]
-                else:
-                    fetch_list = [z_d.name, s_d.name]
+                fetch_list = [z_d.name, s_d.name]
+
+                print('Inplace is {}'.format("ON" if inplace else "OFF"))

                z, s = exe.run(prog,
                               feed={x_d.name: x,
--- a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py
+++ b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py
@ -14,6 +14,7 @@

 from __future__ import print_function

+import logging
 import six
 import sys
 from collections import defaultdict, MutableSet
@ -550,8 +551,14 @@ def memory_optimize(input_program,
            fluid.memory_optimize(main_prog)

    """
-    sys.stderr.write('memory_optimize is deprecated. '
-                     'Use CompiledProgram and Executor\n')
+    logging.warn(
+        'Caution! paddle.fluid.memory_optimize() is deprecated '
+        'and not maintained any more, since it is not stable!\n'
+        'Please use the newest and stable memory optimization strategies!\n'
+        ' 1. Enable garbage collection strategy by exporting environment '
+        'variable FLAGS_eager_delete_tensor_gb=0\n'
+        ' 2. Set build_strategy.enable_inplace=True (True is the default '
+        'value) when using CompiledProgram or ParallelExecutor.\n')

    def to_name_str(var):
        if isinstance(var, Variable):