[Dy2stat] Support Various-Length Return Grammar in Dy2stat (#25249)

Support Various-Length Return Grammar in Dy2stat. This PR is a follow-up of https://github.com/PaddlePaddle/Paddle/pull/25176 .

The basic idea is putting no-value placeholder variables at `return` statement to make all `return` statement have same length, after that the static graph can have fixed fetch output (code at return_transformer.py). Then remove those no-value placeholder when we finally return dygraph result (code at partial_program.py).

However, various length return in Bert model is still not supported. The dy2stat can change the code as I wish but some ops which check shape at compile time (e.g. Reshape, MatMul) will throw error because of the no-value-placeholder may not have the required shape. Is this a matter? To me, those no-value placeholder will be replaced as really values meeting shape requirements at run time, so I think the solution should be some way to do the compile-time checking. By the way, every time when we have dynamic shape, it often causes problem in dy2stat. We should find a way to handle it in the future.

Fixing various return in Bert is my TODO thing and I will also find some other existing models for verification.
fix_copy_if_different
Huihuang Zheng 5 years ago committed by GitHub
parent de27569eca
commit 5e8e6dad72
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -19,9 +19,10 @@ import logging
from paddle.fluid import log_helper from paddle.fluid import log_helper
from paddle.fluid import framework, backward, core from paddle.fluid import framework, backward, core
from paddle.fluid.dygraph import layers from paddle.fluid.dygraph import layers
from paddle.fluid.dygraph.base import switch_to_static_graph
from paddle.fluid.dygraph.dygraph_to_static.return_transformer import RETURN_NO_VALUE_MAGIC_NUM
from paddle.fluid.layers.utils import flatten from paddle.fluid.layers.utils import flatten
from paddle.fluid.layers.utils import pack_sequence_as from paddle.fluid.layers.utils import pack_sequence_as
from paddle.fluid.dygraph.base import switch_to_static_graph
import paddle.compat as cpt import paddle.compat as cpt
_logger = log_helper.get_logger( _logger = log_helper.get_logger(
@ -184,7 +185,8 @@ class PartialProgramLayer(layers.Layer):
'is_test': not self.training 'is_test': not self.training
}) })
return self._restore_out(out_vars) restored_nest_out = self._restore_out(out_vars)
return self._remove_no_value(restored_nest_out)
def _prepare(self, inputs): def _prepare(self, inputs):
""" """
@ -239,11 +241,44 @@ class PartialProgramLayer(layers.Layer):
for i, idx in enumerate(self._outputs.var_ids): for i, idx in enumerate(self._outputs.var_ids):
flatten_outputs[idx] = out_vars[i] flatten_outputs[idx] = out_vars[i]
outs = self._outputs.restore(flatten_outputs) outs = self._outputs.restore(flatten_outputs)
if len(outs) == 1: if outs is not None and len(outs) == 1:
outs = outs[0] outs = outs[0]
return outs return outs
def _is_no_value(self, var):
if isinstance(var, core.VarBase):
if var.shape == [1] and var.numpy()[0] == RETURN_NO_VALUE_MAGIC_NUM:
return True
return False
def _remove_no_value(self, out_vars):
"""
Removes invalid value for various-length return statement
"""
if isinstance(out_vars, core.VarBase):
if self._is_no_value(out_vars):
return None
return out_vars
elif isinstance(out_vars, (tuple, list)):
if isinstance(out_vars, tuple):
res = tuple(
var for var in out_vars if not self._is_no_value(var))
else:
# isinstance(out_vars, list)
res = [var for var in out_vars if not self._is_no_value(var)]
has_removed = (len(out_vars) > len(res))
# len(out_vars) > len(res) means we have removed var. This is
# preventing out_vars is empty or just one element at the beginning
if len(res) == 0 and has_removed:
return None
elif len(res) == 1 and has_removed:
return res[0]
return res
return out_vars
def _set_grad_type(self, params): def _set_grad_type(self, params):
# NOTE: if user set sparse gradient mode, the param's gradient # NOTE: if user set sparse gradient mode, the param's gradient
# will be SelectedRows, not LoDTensor. But tracer will just # will be SelectedRows, not LoDTensor. But tracer will just

@ -278,8 +278,9 @@ class ConcreteProgram(object):
with param_guard(func_spec.parameters(False)), param_guard( with param_guard(func_spec.parameters(False)), param_guard(
func_spec.buffers(False)): func_spec.buffers(False)):
outputs = static_func(*inputs) outputs = static_func(*inputs)
if not isinstance(outputs, (tuple, list)): if not isinstance(outputs,
outputs = [outputs] if outputs else [] (tuple, list)) and outputs is not None:
outputs = [outputs]
return ConcreteProgram( return ConcreteProgram(
inputs=inputs, inputs=inputs,

@ -67,8 +67,9 @@ class StaticCode1():
shape=[1], dtype='bool', value=False) shape=[1], dtype='bool', value=False)
__return_0 = fluid.layers.fill_constant( __return_0 = fluid.layers.fill_constant(
shape=[1], dtype='bool', value=False) shape=[1], dtype='bool', value=False)
__return_value_0 = fluid.layers.fill_constant( __return_value_init_0 = fluid.layers.fill_constant(
shape=[1], dtype='float64', value=0.0) shape=[1], dtype='float64', value=0.0)
__return_value_0 = __return_value_init_0
def true_fn_0(x_v): def true_fn_0(x_v):
x_v = x_v - 1 x_v = x_v - 1
@ -123,8 +124,9 @@ class StaticCode2():
shape=[1], dtype='bool', value=False) shape=[1], dtype='bool', value=False)
__return_2 = fluid.layers.fill_constant( __return_2 = fluid.layers.fill_constant(
shape=[1], dtype='bool', value=False) shape=[1], dtype='bool', value=False)
__return_value_1 = fluid.layers.fill_constant( __return_value_init_1 = fluid.layers.fill_constant(
shape=[1], dtype='float64', value=0.0) shape=[1], dtype='float64', value=0.0)
__return_value_1 = __return_value_init_1
def true_fn_3(x_v): def true_fn_3(x_v):
x_v = x_v - 1 x_v = x_v - 1

@ -17,6 +17,7 @@ from __future__ import print_function
import unittest import unittest
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.dygraph import declarative from paddle.fluid.dygraph import declarative
from paddle.fluid.dygraph import ProgramTranslator from paddle.fluid.dygraph import ProgramTranslator
@ -96,6 +97,56 @@ def test_recursive_return(x):
return dyfunc_with_if_else(x) return dyfunc_with_if_else(x)
@declarative
def test_return_different_length_if_body(x):
x = fluid.dygraph.to_variable(x)
y = x + 1
if x > 0:
# x = to_variable(np.ones(1)) so it will return here
return x, y
else:
return x
@declarative
def test_return_different_length_else(x):
x = fluid.dygraph.to_variable(x)
y = x + 1
if x < 0:
return x, y
else:
# x = to_variable(np.ones(1)) so it will return here
return x
@declarative
def test_no_return(x):
x = fluid.dygraph.to_variable(x)
y = x + 1
@declarative
def test_return_none(x):
x = fluid.dygraph.to_variable(x)
y = x + 1
if x > 0:
# x = to_variable(np.ones(1)) so it will return here
return None
else:
return x, y
@declarative
def test_return_no_variable(x):
x = fluid.dygraph.to_variable(x)
y = x + 1
if x < 0:
return x, y
else:
# x = to_variable(np.ones(1)) so it will return here
return
class TestReturnBase(unittest.TestCase): class TestReturnBase(unittest.TestCase):
def setUp(self): def setUp(self):
self.input = np.ones((1)).astype('int32') self.input = np.ones((1)).astype('int32')
@ -111,21 +162,41 @@ class TestReturnBase(unittest.TestCase):
self.program_translator.enable(False) self.program_translator.enable(False)
with fluid.dygraph.guard(): with fluid.dygraph.guard():
res = self.dygraph_func(self.input) res = self.dygraph_func(self.input)
return res.numpy() if isinstance(res, (tuple)):
return tuple(r.numpy() for r in res)
elif isinstance(res, core.VarBase):
return res.numpy()
return res
def run_static_mode(self): def run_static_mode(self):
self.program_translator.enable(True) self.program_translator.enable(True)
with fluid.dygraph.guard(): with fluid.dygraph.guard():
res = self.dygraph_func(self.input) res = self.dygraph_func(self.input)
return res.numpy() if isinstance(res, tuple):
return tuple(r.numpy() for r in res)
elif isinstance(res, core.VarBase):
return res.numpy()
return res
def test_transformed_static_result(self): def test_transformed_static_result(self):
static_res = self.run_static_mode()
dygraph_res = self.run_dygraph_mode() dygraph_res = self.run_dygraph_mode()
self.assertTrue( static_res = self.run_static_mode()
np.allclose(dygraph_res, static_res), if isinstance(dygraph_res, tuple):
msg='dygraph res is {}\nstatic_res is {}'.format(dygraph_res, self.assertTrue(isinstance(static_res, tuple))
static_res)) self.assertEqual(len(dygraph_res), len(static_res))
for i in range(len(dygraph_res)):
self.assertTrue(
np.allclose(dygraph_res[i], static_res[i]),
msg='dygraph res is {}\nstatic_res is {}'.format(
dygraph_res[i], static_res[i]))
elif isinstance(dygraph_res, np.ndarray):
self.assertTrue(
np.allclose(dygraph_res, static_res),
msg='dygraph res is {}\nstatic_res is {}'.format(dygraph_res,
static_res))
else:
self.assertEqual(dygraph_res, static_res)
class TestInsideFuncBase(TestReturnBase): class TestInsideFuncBase(TestReturnBase):
@ -159,5 +230,30 @@ class TestRecursiveReturn(TestReturnBase):
self.dygraph_func = test_recursive_return self.dygraph_func = test_recursive_return
class TestReturnDifferentLengthIfBody(TestReturnBase):
def init_dygraph_func(self):
self.dygraph_func = test_return_different_length_if_body
class TestReturnDifferentLengthElse(TestReturnBase):
def init_dygraph_func(self):
self.dygraph_func = test_return_different_length_else
class TestNoReturn(TestReturnBase):
def init_dygraph_func(self):
self.dygraph_func = test_no_return
class TestReturnNone(TestReturnBase):
def init_dygraph_func(self):
self.dygraph_func = test_return_none
class TestReturnNoVariable(TestReturnBase):
def init_dygraph_func(self):
self.dygraph_func = test_return_no_variable
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

Loading…
Cancel
Save