From c1fdacd4b495369db5f5bfcf2b9dc25d16a8e231 Mon Sep 17 00:00:00 2001 From: Xin Pan Date: Tue, 15 Jan 2019 10:12:09 +0800 Subject: [PATCH 1/5] add imperative mode design test=develop --- paddle/fluid/imperative/README.md | 148 ++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 paddle/fluid/imperative/README.md diff --git a/paddle/fluid/imperative/README.md b/paddle/fluid/imperative/README.md new file mode 100644 index 0000000000..294c64b36a --- /dev/null +++ b/paddle/fluid/imperative/README.md @@ -0,0 +1,148 @@ +# Overview + +Imperative Programming + +# Related Works + +## Pytorch +https://pytorch.org/ + +## TensorFlow Eager +https://www.tensorflow.org/guide/eager + +# Design + +## API +```python +class Layer(object): + + def __call__(inputs): + # build some parameter once. + # ... + return self.apply(inputs): + + + def apply(inputs): + # forward logic with paddle operators. backward auto-generated. + + +class PyLayer(core.PyLayer): + + def __call__(cls, inputs): + # trace the logic. + + @staticmethod + def forward(inputs): + # any forward logic implemented with numpy io. + + @static method + # any backward logic implemented with numpy io. +``` + + +## Tracer + +Python Variable -> C++ VarBase -> C++ Variable -> C++ Tensor + + +```cpp +class Tracer { + public: + explicit Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {} + + virtual ~Tracer() {} + + void Trace(OpBase* op, + const std::map>& inputs, + const std::map>& outputs, + framework::BlockDesc* block, const bool stop_gradient = false); + + std::vector PyTrace(OpBase* op, const std::vector& inputs, + bool stop_gradient = false); +}; +``` + +## Autodiff + +Lots of research already. +https://autodiff-workshop.github.io/ + + +## Tests + +* All op tests run once in static graph, once in imperative mode. + +## Refactor + +* All function layers with parameters converted to class Layers. +* Models converted to imperative mode. + + +# Examples + +```python +class MyLayer(fluid.imperative.Layer): + def __init__(self): + super(MyLayer, self).__init__() + + def forward(self, inputs): + x = fluid.layers.relu(inputs) + x = fluid.layers.elementwise_mul(x, x) + x = fluid.layers.reduce_sum(x) + return [x] + + +class MyPyLayer(fluid.imperative.PyLayer): + def __init__(self): + super(MyPyLayer, self).__init__() + + @staticmethod + def forward(inputs): + return np.tanh(inputs[0]) + + @staticmethod + def backward(inputs): + return np.array(dout) * (1 - np.square(np.array(out))) + + +class MLP(fluid.imperative.Layer): + def __init__(self): + super(MLP, self).__init__() + self._fc1 = FC(3, + fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.1))) + self._fc2 = FC(4, + fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.1))) + + def forward(self, inputs): + x = self._fc1(inputs) + x = self._fc2(x) + x = fluid.layers.reduce_sum(x) + return x + + + np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) + with fluid.imperative.guard(): + var_inp = fluid.imperative.base.to_variable(np_inp) + mlp = MLP() + out = mlp(var_inp) + dy_out = out._numpy() + out._backward() +``` + +# Plan + +2.1,3 fulltime, Can run a few simple models. (Currently, 2 20% engs) + +4.1, 4 fulltime, Can run 6 models, Performance 70% Pytorch. Release alpha. + +6.1, 5 fulltime, Performance close to Pytorch, can run multi-devices. Release Beta. + +8.1, 5 fulltime, Works in general. Covert current models to use imperative mode. + +12.1, 5 fulltime, Can compile to static graph, support more optimizations. + +# Discussion + +TODO. From f997109bb1486d3aa9cfb027729d9a9c02340382 Mon Sep 17 00:00:00 2001 From: Xin Pan Date: Tue, 15 Jan 2019 10:18:08 +0800 Subject: [PATCH 2/5] polish --- paddle/fluid/imperative/README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/imperative/README.md b/paddle/fluid/imperative/README.md index 294c64b36a..89543da752 100644 --- a/paddle/fluid/imperative/README.md +++ b/paddle/fluid/imperative/README.md @@ -21,8 +21,7 @@ class Layer(object): # ... return self.apply(inputs): - - def apply(inputs): + def forward(inputs): # forward logic with paddle operators. backward auto-generated. @@ -35,7 +34,8 @@ class PyLayer(core.PyLayer): def forward(inputs): # any forward logic implemented with numpy io. - @static method + @staticmethod + def backward(inputs): # any backward logic implemented with numpy io. ``` @@ -67,7 +67,6 @@ class Tracer { Lots of research already. https://autodiff-workshop.github.io/ - ## Tests * All op tests run once in static graph, once in imperative mode. @@ -131,6 +130,7 @@ class MLP(fluid.imperative.Layer): out._backward() ``` + # Plan 2.1,3 fulltime, Can run a few simple models. (Currently, 2 20% engs) @@ -143,6 +143,7 @@ class MLP(fluid.imperative.Layer): 12.1, 5 fulltime, Can compile to static graph, support more optimizations. + # Discussion TODO. From 783dbe9abbf72b3a5460ee44f057b39051294a52 Mon Sep 17 00:00:00 2001 From: Xin Pan Date: Tue, 15 Jan 2019 10:37:42 +0800 Subject: [PATCH 3/5] more doc test=develop --- paddle/fluid/imperative/README.md | 51 +++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/imperative/README.md b/paddle/fluid/imperative/README.md index 89543da752..adabb5b0a5 100644 --- a/paddle/fluid/imperative/README.md +++ b/paddle/fluid/imperative/README.md @@ -1,6 +1,6 @@ # Overview -Imperative Programming +Imperative Programming is easier to learn, debug and try new ideas. # Related Works @@ -37,12 +37,38 @@ class PyLayer(core.PyLayer): @staticmethod def backward(inputs): # any backward logic implemented with numpy io. + + + ``` ## Tracer -Python Variable -> C++ VarBase -> C++ Variable -> C++ Tensor +Current: Python Variable -> C++ VarBase -> C++ Variable -> C++ Tensor + +Longer term. +```python + +# Parent class. +class PyVarBase(object): + pass + +# Current python variable. +class Variable(PyVarBase): + pass + +class IVariable(PyVarBase): + def __init__(self): + self._ivar = core.VarBase() + + def to(device): pass + def value(): pass + def backward(): pass + def gradient_value(): pass + # operators to override. +``` + ```cpp @@ -62,10 +88,21 @@ class Tracer { }; ``` +* Trace forward operations +* Perform simple python level infer and return to user. +* Perform autograd to generate gradients. +* Clear trace. +* Apply gradients with optimizers + ## Autodiff Lots of research already. https://autodiff-workshop.github.io/ +https://en.wikipedia.org/wiki/Automatic_differentiation + +## Execution Engine + +Lazy execution of pushed C++ operations. ## Tests @@ -76,7 +113,6 @@ https://autodiff-workshop.github.io/ * All function layers with parameters converted to class Layers. * Models converted to imperative mode. - # Examples ```python @@ -131,6 +167,10 @@ class MLP(fluid.imperative.Layer): ``` +## Save/Load Models + +TODO + # Plan 2.1,3 fulltime, Can run a few simple models. (Currently, 2 20% engs) @@ -139,10 +179,9 @@ class MLP(fluid.imperative.Layer): 6.1, 5 fulltime, Performance close to Pytorch, can run multi-devices. Release Beta. -8.1, 5 fulltime, Works in general. Covert current models to use imperative mode. - -12.1, 5 fulltime, Can compile to static graph, support more optimizations. +8.1, 5 fulltime, Works in general. Update existing models. Can compile to static graph, support more optimizations. +12.1 Done. # Discussion From d7b159355c02b336895531ea2b8a439727d988bf Mon Sep 17 00:00:00 2001 From: Xin Pan Date: Tue, 15 Jan 2019 15:46:06 +0800 Subject: [PATCH 4/5] add more doc test=develop --- paddle/fluid/imperative/README.md | 33 ++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/imperative/README.md b/paddle/fluid/imperative/README.md index adabb5b0a5..c23a779048 100644 --- a/paddle/fluid/imperative/README.md +++ b/paddle/fluid/imperative/README.md @@ -38,8 +38,6 @@ class PyLayer(core.PyLayer): def backward(inputs): # any backward logic implemented with numpy io. - - ``` @@ -62,9 +60,13 @@ class IVariable(PyVarBase): def __init__(self): self._ivar = core.VarBase() + # Move var to a device. def to(device): pass + # Get var value. def value(): pass + # Trigger backward. def backward(): pass + # Get var's gradient value. def gradient_value(): pass # operators to override. ``` @@ -100,18 +102,22 @@ Lots of research already. https://autodiff-workshop.github.io/ https://en.wikipedia.org/wiki/Automatic_differentiation -## Execution Engine +Basically, trace the forward execution, and perform autodiff +when needed. -Lazy execution of pushed C++ operations. +* Can be triggered by `backward()`. +* Can select a block of code to trace and autodiff. +* Use `require_grad` to drop some forward subgraph that doesn't need autodiff. -## Tests +## Execution Engine -* All op tests run once in static graph, once in imperative mode. +Lazy execution of pushed C++ operations. ## Refactor * All function layers with parameters converted to class Layers. -* Models converted to imperative mode. +* Existing models converted to imperative mode. +* All op tests run once in static graph, once in imperative mode. # Examples @@ -140,6 +146,15 @@ class MyPyLayer(fluid.imperative.PyLayer): return np.array(dout) * (1 - np.square(np.array(out))) +np_inp = np.ones([2, 2], np.float32) +with fluid.imperative.guard(): + my_py_layer = MyPyLayer() + outs = my_py_layer(np_inp) + dy_out = np.sum(outs[0]._numpy()) + outs[0]._backward() + dy_grad = var_inp._gradient() + + class MLP(fluid.imperative.Layer): def __init__(self): super(MLP, self).__init__() @@ -171,6 +186,10 @@ class MLP(fluid.imperative.Layer): TODO +## I/O + +TODO + # Plan 2.1,3 fulltime, Can run a few simple models. (Currently, 2 20% engs) From 6b762f65192ae8a3c35a9a01a1719c3e9402225f Mon Sep 17 00:00:00 2001 From: Xin Pan Date: Tue, 15 Jan 2019 15:52:39 +0800 Subject: [PATCH 5/5] add doc test=develop --- paddle/fluid/imperative/README.md | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/imperative/README.md b/paddle/fluid/imperative/README.md index c23a779048..4c4d619b35 100644 --- a/paddle/fluid/imperative/README.md +++ b/paddle/fluid/imperative/README.md @@ -91,7 +91,7 @@ class Tracer { ``` * Trace forward operations -* Perform simple python level infer and return to user. +* Perform quick shape/type infer, push kernel execution engine and return to user. * Perform autograd to generate gradients. * Clear trace. * Apply gradients with optimizers @@ -113,6 +113,20 @@ when needed. Lazy execution of pushed C++ operations. +## Device Placement + +* Operator executes on the inputs' device. +* All inputs should live on the same device. +* use `Var.to()` to explicitly move var to a device. + +## Save/Load Models + +TODO + +## I/O + +TODO + ## Refactor * All function layers with parameters converted to class Layers. @@ -181,15 +195,6 @@ class MLP(fluid.imperative.Layer): out._backward() ``` - -## Save/Load Models - -TODO - -## I/O - -TODO - # Plan 2.1,3 fulltime, Can run a few simple models. (Currently, 2 20% engs)