[Dy2stat] Support Various-Length Return Grammar in Dy2stat (#25249)

Support Various-Length Return Grammar in Dy2stat. This PR is a follow-up of https://github.com/PaddlePaddle/Paddle/pull/25176 .

The basic idea is putting no-value placeholder variables at `return` statement to make all `return` statement have same length, after that the static graph can have fixed fetch output (code at return_transformer.py). Then remove those no-value placeholder when we finally return dygraph result (code at partial_program.py).

However, various length return in Bert model is still not supported. The dy2stat can change the code as I wish but some ops which check shape at compile time (e.g. Reshape, MatMul) will throw error because of the no-value-placeholder may not have the required shape. Is this a matter? To me, those no-value placeholder will be replaced as really values meeting shape requirements at run time, so I think the solution should be some way to do the compile-time checking. By the way, every time when we have dynamic shape, it often causes problem in dy2stat. We should find a way to handle it in the future.

Fixing various return in Bert is my TODO thing and I will also find some other existing models for verification.
fix_copy_if_different
Huihuang Zheng 5 years ago committed by GitHub
parent de27569eca
commit 5e8e6dad72
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -19,9 +19,10 @@ import logging
from paddle.fluid import log_helper
from paddle.fluid import framework, backward, core
from paddle.fluid.dygraph import layers
from paddle.fluid.dygraph.base import switch_to_static_graph
from paddle.fluid.dygraph.dygraph_to_static.return_transformer import RETURN_NO_VALUE_MAGIC_NUM
from paddle.fluid.layers.utils import flatten
from paddle.fluid.layers.utils import pack_sequence_as
from paddle.fluid.dygraph.base import switch_to_static_graph
import paddle.compat as cpt
_logger = log_helper.get_logger(
@ -184,7 +185,8 @@ class PartialProgramLayer(layers.Layer):
'is_test': not self.training
})
return self._restore_out(out_vars)
restored_nest_out = self._restore_out(out_vars)
return self._remove_no_value(restored_nest_out)
def _prepare(self, inputs):
"""
@ -239,11 +241,44 @@ class PartialProgramLayer(layers.Layer):
for i, idx in enumerate(self._outputs.var_ids):
flatten_outputs[idx] = out_vars[i]
outs = self._outputs.restore(flatten_outputs)
if len(outs) == 1:
if outs is not None and len(outs) == 1:
outs = outs[0]
return outs
def _is_no_value(self, var):
if isinstance(var, core.VarBase):
if var.shape == [1] and var.numpy()[0] == RETURN_NO_VALUE_MAGIC_NUM:
return True
return False
def _remove_no_value(self, out_vars):
"""
Removes invalid value for various-length return statement
"""
if isinstance(out_vars, core.VarBase):
if self._is_no_value(out_vars):
return None
return out_vars
elif isinstance(out_vars, (tuple, list)):
if isinstance(out_vars, tuple):
res = tuple(
var for var in out_vars if not self._is_no_value(var))
else:
# isinstance(out_vars, list)
res = [var for var in out_vars if not self._is_no_value(var)]
has_removed = (len(out_vars) > len(res))
# len(out_vars) > len(res) means we have removed var. This is
# preventing out_vars is empty or just one element at the beginning
if len(res) == 0 and has_removed:
return None
elif len(res) == 1 and has_removed:
return res[0]
return res
return out_vars
def _set_grad_type(self, params):
# NOTE: if user set sparse gradient mode, the param's gradient
# will be SelectedRows, not LoDTensor. But tracer will just

@ -278,8 +278,9 @@ class ConcreteProgram(object):
with param_guard(func_spec.parameters(False)), param_guard(
func_spec.buffers(False)):
outputs = static_func(*inputs)
if not isinstance(outputs, (tuple, list)):
outputs = [outputs] if outputs else []
if not isinstance(outputs,
(tuple, list)) and outputs is not None:
outputs = [outputs]
return ConcreteProgram(
inputs=inputs,

@ -67,8 +67,9 @@ class StaticCode1():
shape=[1], dtype='bool', value=False)
__return_0 = fluid.layers.fill_constant(
shape=[1], dtype='bool', value=False)
__return_value_0 = fluid.layers.fill_constant(
__return_value_init_0 = fluid.layers.fill_constant(
shape=[1], dtype='float64', value=0.0)
__return_value_0 = __return_value_init_0
def true_fn_0(x_v):
x_v = x_v - 1
@ -123,8 +124,9 @@ class StaticCode2():
shape=[1], dtype='bool', value=False)
__return_2 = fluid.layers.fill_constant(
shape=[1], dtype='bool', value=False)
__return_value_1 = fluid.layers.fill_constant(
__return_value_init_1 = fluid.layers.fill_constant(
shape=[1], dtype='float64', value=0.0)
__return_value_1 = __return_value_init_1
def true_fn_3(x_v):
x_v = x_v - 1

@ -17,6 +17,7 @@ from __future__ import print_function
import unittest
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.dygraph import declarative
from paddle.fluid.dygraph import ProgramTranslator
@ -96,6 +97,56 @@ def test_recursive_return(x):
return dyfunc_with_if_else(x)
@declarative
def test_return_different_length_if_body(x):
x = fluid.dygraph.to_variable(x)
y = x + 1
if x > 0:
# x = to_variable(np.ones(1)) so it will return here
return x, y
else:
return x
@declarative
def test_return_different_length_else(x):
x = fluid.dygraph.to_variable(x)
y = x + 1
if x < 0:
return x, y
else:
# x = to_variable(np.ones(1)) so it will return here
return x
@declarative
def test_no_return(x):
x = fluid.dygraph.to_variable(x)
y = x + 1
@declarative
def test_return_none(x):
x = fluid.dygraph.to_variable(x)
y = x + 1
if x > 0:
# x = to_variable(np.ones(1)) so it will return here
return None
else:
return x, y
@declarative
def test_return_no_variable(x):
x = fluid.dygraph.to_variable(x)
y = x + 1
if x < 0:
return x, y
else:
# x = to_variable(np.ones(1)) so it will return here
return
class TestReturnBase(unittest.TestCase):
def setUp(self):
self.input = np.ones((1)).astype('int32')
@ -111,21 +162,41 @@ class TestReturnBase(unittest.TestCase):
self.program_translator.enable(False)
with fluid.dygraph.guard():
res = self.dygraph_func(self.input)
return res.numpy()
if isinstance(res, (tuple)):
return tuple(r.numpy() for r in res)
elif isinstance(res, core.VarBase):
return res.numpy()
return res
def run_static_mode(self):
self.program_translator.enable(True)
with fluid.dygraph.guard():
res = self.dygraph_func(self.input)
return res.numpy()
if isinstance(res, tuple):
return tuple(r.numpy() for r in res)
elif isinstance(res, core.VarBase):
return res.numpy()
return res
def test_transformed_static_result(self):
static_res = self.run_static_mode()
dygraph_res = self.run_dygraph_mode()
self.assertTrue(
np.allclose(dygraph_res, static_res),
msg='dygraph res is {}\nstatic_res is {}'.format(dygraph_res,
static_res))
static_res = self.run_static_mode()
if isinstance(dygraph_res, tuple):
self.assertTrue(isinstance(static_res, tuple))
self.assertEqual(len(dygraph_res), len(static_res))
for i in range(len(dygraph_res)):
self.assertTrue(
np.allclose(dygraph_res[i], static_res[i]),
msg='dygraph res is {}\nstatic_res is {}'.format(
dygraph_res[i], static_res[i]))
elif isinstance(dygraph_res, np.ndarray):
self.assertTrue(
np.allclose(dygraph_res, static_res),
msg='dygraph res is {}\nstatic_res is {}'.format(dygraph_res,
static_res))
else:
self.assertEqual(dygraph_res, static_res)
class TestInsideFuncBase(TestReturnBase):
@ -159,5 +230,30 @@ class TestRecursiveReturn(TestReturnBase):
self.dygraph_func = test_recursive_return
class TestReturnDifferentLengthIfBody(TestReturnBase):
def init_dygraph_func(self):
self.dygraph_func = test_return_different_length_if_body
class TestReturnDifferentLengthElse(TestReturnBase):
def init_dygraph_func(self):
self.dygraph_func = test_return_different_length_else
class TestNoReturn(TestReturnBase):
def init_dygraph_func(self):
self.dygraph_func = test_no_return
class TestReturnNone(TestReturnBase):
def init_dygraph_func(self):
self.dygraph_func = test_return_none
class TestReturnNoVariable(TestReturnBase):
def init_dygraph_func(self):
self.dygraph_func = test_return_no_variable
if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save