|
|
|
@ -25,45 +25,22 @@ namespace paddle {
|
|
|
|
|
namespace inference {
|
|
|
|
|
namespace analysis {
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Memory optimization pass for inference with pre-analysis of memory usage
|
|
|
|
|
* without GC.
|
|
|
|
|
* Different from training, the inference memory reuse strategies doesn't
|
|
|
|
|
* include GC for that overhead is too much when batch size equals one.
|
|
|
|
|
*
|
|
|
|
|
* The inference memory reuse tries to pre-determine the tensor reusing strategy
|
|
|
|
|
* without runtime overhead.
|
|
|
|
|
*
|
|
|
|
|
* To improve the strategy's performance, a warm-up running is introduced:
|
|
|
|
|
* - Before officially deploy the inference program, one should warm it up and
|
|
|
|
|
* generate some runtime cache,
|
|
|
|
|
* - Run the inference program with several batches of data, it will persist
|
|
|
|
|
* some runtime information about memory of tensors to disk, we call the
|
|
|
|
|
* information the memory reusing cache,
|
|
|
|
|
* - With the memory reusing cache, user can deploy the inference to a
|
|
|
|
|
* service, before running the model, the inference program will load the
|
|
|
|
|
* memory cache, analysis it and generate the best memory reusing strategy,
|
|
|
|
|
* and adjust the execution of the network.
|
|
|
|
|
*
|
|
|
|
|
* With the warm-up and memory reusing cache design, the memory reusing
|
|
|
|
|
* algorithm can analysis the real memory consume of the tensors, even with the
|
|
|
|
|
* flexible LoDTensor and special shape changing operators such as
|
|
|
|
|
* sequence-pooling.
|
|
|
|
|
/* Memory optimization.
|
|
|
|
|
* We will perform the following operation:
|
|
|
|
|
* 1. Collect all var's lifetime.
|
|
|
|
|
* 2. Make reuse plan: the vars can be reused if there is no overlap(on lifetime)
|
|
|
|
|
* between
|
|
|
|
|
* them.
|
|
|
|
|
* The final plan is a mapping table in which the key represents the original
|
|
|
|
|
* name of var and the value in the table represents the current name of var.
|
|
|
|
|
* 3. Perform reuse plan: Replace all var's name in the model according to the
|
|
|
|
|
* mapping table.
|
|
|
|
|
*/
|
|
|
|
|
class MemoryOptimizePass : public AnalysisPass {
|
|
|
|
|
public:
|
|
|
|
|
using space_table_t = std::unordered_map<std::string, size_t>;
|
|
|
|
|
using lifecycle_t = std::pair<int, int>;
|
|
|
|
|
|
|
|
|
|
struct MemoryAllocation {
|
|
|
|
|
size_t allocated; // allocated memory in byte.
|
|
|
|
|
size_t saved; // saved memory in byte.
|
|
|
|
|
int sort_kind; // the kind of the corresponding sorting algorithm.
|
|
|
|
|
|
|
|
|
|
// Get the memory saving ratio of temporary variables.
|
|
|
|
|
float GetSavingRatio() const;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
virtual ~MemoryOptimizePass() = default;
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
@ -75,24 +52,6 @@ class MemoryOptimizePass : public AnalysisPass {
|
|
|
|
|
int sort_kind) const;
|
|
|
|
|
|
|
|
|
|
void CollectVarMemorySize(space_table_t *space_table) const;
|
|
|
|
|
void CollectVarMemorySize0(space_table_t *space_table) const;
|
|
|
|
|
|
|
|
|
|
void CollectVarMemorySize(
|
|
|
|
|
const std::unordered_map<std::string, size_t> &batch_var_ave_dim,
|
|
|
|
|
std::unordered_map<std::string, framework::ir::Node *> *tensor_nodes,
|
|
|
|
|
space_table_t *space_table) const;
|
|
|
|
|
|
|
|
|
|
// Returns percentage of saved memory.
|
|
|
|
|
void MakeReusePlan(
|
|
|
|
|
const std::vector<std::unordered_set<std::string>> &var_clusters,
|
|
|
|
|
const std::unordered_map<std::string, size_t> &var_batch_ave_size,
|
|
|
|
|
const space_table_t &space_table,
|
|
|
|
|
std::unordered_map<std::string, std::string> *reuse_table, int sort_kind,
|
|
|
|
|
MemoryAllocation *memory_allocation) const;
|
|
|
|
|
|
|
|
|
|
void PerformReusePlan(
|
|
|
|
|
const std::unordered_map<std::string, std::string> &reuse_table,
|
|
|
|
|
int sort_kind, std::unordered_set<std::string> *vars2remove) const;
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
std::string repr() const override;
|
|
|
|
@ -102,12 +61,6 @@ class MemoryOptimizePass : public AnalysisPass {
|
|
|
|
|
mutable int max_lifecycle_{-1};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static std::string GetMemoryCachePath(const std::string &model_path,
|
|
|
|
|
const std::string &prog_path) {
|
|
|
|
|
auto path = model_path.empty() ? prog_path : model_path;
|
|
|
|
|
return path + ".memory_cache";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace analysis
|
|
|
|
|
} // namespace inference
|
|
|
|
|
} // namespace paddle
|
|
|
|
|