|
|
|
@ -76,6 +76,18 @@ class StrategyCompiler(StrategyCompilerBase):
|
|
|
|
|
opt._disable_strategy(valid_strategy)
|
|
|
|
|
return valid_strategy
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
Meta Optimizer Type A: rewrite forward, backward. e.g. recompute, async, sync, pipeline.
|
|
|
|
|
results will be splitted in async, sync, pipeline
|
|
|
|
|
Meta Optimizer Type B: rewrite forward,
|
|
|
|
|
e.g. AMP and the corresponding backward is generated by rewritten forward
|
|
|
|
|
Meta Opitmizer Type B: rewrite backward. e.g. gradient fusion
|
|
|
|
|
Meta Optimizer Type D: rewrite optimize. e.g. lars, lamb, localsgd, gradient merge, dgc
|
|
|
|
|
Meta Optimizer Type E: only transpile to Graph structure for runtime,
|
|
|
|
|
currently, grad fusion and kernel fusion, sync batch-norm included.
|
|
|
|
|
we will remove grad fusion and sync batch-norm
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def generate_optimizer(self, loss, role_maker, optimizer,
|
|
|
|
|
user_defined_strategy, meta_optimizer_list,
|
|
|
|
|
graph_optimizer_list):
|
|
|
|
|