From c1fdacd4b495369db5f5bfcf2b9dc25d16a8e231 Mon Sep 17 00:00:00 2001
From: Xin Pan <panxin.grad@gmail.com>
Date: Tue, 15 Jan 2019 10:12:09 +0800
Subject: [PATCH 1/5] add imperative mode design

test=develop
---
 paddle/fluid/imperative/README.md | 148 ++++++++++++++++++++++++++++++
 1 file changed, 148 insertions(+)
 create mode 100644 paddle/fluid/imperative/README.md

diff --git a/paddle/fluid/imperative/README.md b/paddle/fluid/imperative/README.md
new file mode 100644
index 0000000000..294c64b36a
--- /dev/null
+++ b/paddle/fluid/imperative/README.md
@@ -0,0 +1,148 @@
+# Overview
+
+Imperative Programming
+
+# Related Works
+
+## Pytorch
+https://pytorch.org/
+
+## TensorFlow Eager
+https://www.tensorflow.org/guide/eager
+
+# Design
+
+## API
+```python
+class Layer(object):
+
+  def __call__(inputs):
+    # build some parameter once.
+    # ...
+    return self.apply(inputs):
+
+
+  def apply(inputs):
+    # forward logic with paddle operators. backward auto-generated.
+
+
+class PyLayer(core.PyLayer):
+
+  def __call__(cls, inputs):
+    # trace the logic.
+
+  @staticmethod
+  def forward(inputs):
+    # any forward logic implemented with numpy io.
+
+  @static method
+    # any backward logic implemented with numpy io.
+```
+
+
+## Tracer
+
+Python Variable -> C++ VarBase -> C++ Variable -> C++ Tensor
+
+
+```cpp
+class Tracer {
+ public:
+  explicit Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {}
+
+  virtual ~Tracer() {}
+
+  void Trace(OpBase* op,
+             const std::map<std::string, std::vector<VarBase*>>& inputs,
+             const std::map<std::string, std::vector<VarBase*>>& outputs,
+             framework::BlockDesc* block, const bool stop_gradient = false);
+
+  std::vector<VarBase*> PyTrace(OpBase* op, const std::vector<VarBase*>& inputs,
+                                bool stop_gradient = false);
+};
+```
+
+## Autodiff
+
+Lots of research already.
+https://autodiff-workshop.github.io/
+
+
+## Tests
+
+* All op tests run once in static graph, once in imperative mode.
+
+## Refactor
+
+* All function layers with parameters converted to class Layers.
+* Models converted to imperative mode.
+
+
+# Examples
+
+```python
+class MyLayer(fluid.imperative.Layer):
+    def __init__(self):
+        super(MyLayer, self).__init__()
+
+    def forward(self, inputs):
+        x = fluid.layers.relu(inputs)
+        x = fluid.layers.elementwise_mul(x, x)
+        x = fluid.layers.reduce_sum(x)
+        return [x]
+
+
+class MyPyLayer(fluid.imperative.PyLayer):
+    def __init__(self):
+        super(MyPyLayer, self).__init__()
+
+    @staticmethod
+    def forward(inputs):
+        return np.tanh(inputs[0])
+
+    @staticmethod
+    def backward(inputs):
+        return np.array(dout) * (1 - np.square(np.array(out)))
+
+
+class MLP(fluid.imperative.Layer):
+    def __init__(self):
+        super(MLP, self).__init__()
+        self._fc1 = FC(3,
+                       fluid.ParamAttr(
+                           initializer=fluid.initializer.Constant(value=0.1)))
+        self._fc2 = FC(4,
+                       fluid.ParamAttr(
+                           initializer=fluid.initializer.Constant(value=0.1)))
+
+    def forward(self, inputs):
+        x = self._fc1(inputs)
+        x = self._fc2(x)
+        x = fluid.layers.reduce_sum(x)
+        return x
+
+
+ np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
+ with fluid.imperative.guard():
+     var_inp = fluid.imperative.base.to_variable(np_inp)
+     mlp = MLP()
+     out = mlp(var_inp)
+     dy_out = out._numpy()
+     out._backward()
+```
+
+# Plan
+
+2.1，3 fulltime, Can run a few simple models. (Currently, 2 20% engs)
+
+4.1, 4 fulltime, Can run 6 models, Performance 70% Pytorch. Release alpha.
+
+6.1, 5 fulltime, Performance close to Pytorch, can run multi-devices. Release Beta.
+
+8.1, 5 fulltime, Works in general. Covert current models to use imperative mode.
+
+12.1, 5 fulltime, Can compile to static graph, support more optimizations.
+
+# Discussion
+
+TODO.

From f997109bb1486d3aa9cfb027729d9a9c02340382 Mon Sep 17 00:00:00 2001
From: Xin Pan <panxin.grad@gmail.com>
Date: Tue, 15 Jan 2019 10:18:08 +0800
Subject: [PATCH 2/5] polish

---
 paddle/fluid/imperative/README.md | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/paddle/fluid/imperative/README.md b/paddle/fluid/imperative/README.md
index 294c64b36a..89543da752 100644
--- a/paddle/fluid/imperative/README.md
+++ b/paddle/fluid/imperative/README.md
@@ -21,8 +21,7 @@ class Layer(object):
     # ...
     return self.apply(inputs):
 
-
-  def apply(inputs):
+  def forward(inputs):
     # forward logic with paddle operators. backward auto-generated.
 
 
@@ -35,7 +34,8 @@ class PyLayer(core.PyLayer):
   def forward(inputs):
     # any forward logic implemented with numpy io.
 
-  @static method
+  @staticmethod
+  def backward(inputs):
     # any backward logic implemented with numpy io.
 ```
 
@@ -67,7 +67,6 @@ class Tracer {
 Lots of research already.
 https://autodiff-workshop.github.io/
 
-
 ## Tests
 
 * All op tests run once in static graph, once in imperative mode.
@@ -131,6 +130,7 @@ class MLP(fluid.imperative.Layer):
      out._backward()
 ```
 
+
 # Plan
 
 2.1，3 fulltime, Can run a few simple models. (Currently, 2 20% engs)
@@ -143,6 +143,7 @@ class MLP(fluid.imperative.Layer):
 
 12.1, 5 fulltime, Can compile to static graph, support more optimizations.
 
+
 # Discussion
 
 TODO.

From 783dbe9abbf72b3a5460ee44f057b39051294a52 Mon Sep 17 00:00:00 2001
From: Xin Pan <panxin.grad@gmail.com>
Date: Tue, 15 Jan 2019 10:37:42 +0800
Subject: [PATCH 3/5] more doc

test=develop
---
 paddle/fluid/imperative/README.md | 51 +++++++++++++++++++++++++++----
 1 file changed, 45 insertions(+), 6 deletions(-)

diff --git a/paddle/fluid/imperative/README.md b/paddle/fluid/imperative/README.md
index 89543da752..adabb5b0a5 100644
--- a/paddle/fluid/imperative/README.md
+++ b/paddle/fluid/imperative/README.md
@@ -1,6 +1,6 @@
 # Overview
 
-Imperative Programming
+Imperative Programming is easier to learn, debug and try new ideas.
 
 # Related Works
 
@@ -37,12 +37,38 @@ class PyLayer(core.PyLayer):
   @staticmethod
   def backward(inputs):
     # any backward logic implemented with numpy io.
+
+
+
 ```
 
 
 ## Tracer
 
-Python Variable -> C++ VarBase -> C++ Variable -> C++ Tensor
+Current: Python Variable -> C++ VarBase -> C++ Variable -> C++ Tensor
+
+Longer term.
+```python
+
+# Parent class.
+class PyVarBase(object):
+  pass
+
+# Current python variable.
+class Variable(PyVarBase):
+  pass
+
+class IVariable(PyVarBase):
+  def __init__(self):
+    self._ivar = core.VarBase()
+
+  def to(device): pass
+  def value(): pass
+  def backward(): pass
+  def gradient_value(): pass
+  # operators to override.
+```
+
 
 
 ```cpp
@@ -62,10 +88,21 @@ class Tracer {
 };
 ```
 
+* Trace forward operations
+* Perform simple python level infer and return to user.
+* Perform autograd to generate gradients.
+* Clear trace.
+* Apply gradients with optimizers
+
 ## Autodiff
 
 Lots of research already.
 https://autodiff-workshop.github.io/
+https://en.wikipedia.org/wiki/Automatic_differentiation
+
+## Execution Engine
+
+Lazy execution of pushed C++ operations.
 
 ## Tests
 
@@ -76,7 +113,6 @@ https://autodiff-workshop.github.io/
 * All function layers with parameters converted to class Layers.
 * Models converted to imperative mode.
 
-
 # Examples
 
 ```python
@@ -131,6 +167,10 @@ class MLP(fluid.imperative.Layer):
 ```
 
 
+## Save/Load Models
+
+TODO
+
 # Plan
 
 2.1，3 fulltime, Can run a few simple models. (Currently, 2 20% engs)
@@ -139,10 +179,9 @@ class MLP(fluid.imperative.Layer):
 
 6.1, 5 fulltime, Performance close to Pytorch, can run multi-devices. Release Beta.
 
-8.1, 5 fulltime, Works in general. Covert current models to use imperative mode.
-
-12.1, 5 fulltime, Can compile to static graph, support more optimizations.
+8.1, 5 fulltime, Works in general. Update existing models. Can compile to static graph, support more optimizations.
 
+12.1 Done.
 
 # Discussion
 

From d7b159355c02b336895531ea2b8a439727d988bf Mon Sep 17 00:00:00 2001
From: Xin Pan <panxin.grad@gmail.com>
Date: Tue, 15 Jan 2019 15:46:06 +0800
Subject: [PATCH 4/5] add more doc

test=develop
---
 paddle/fluid/imperative/README.md | 33 ++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/paddle/fluid/imperative/README.md b/paddle/fluid/imperative/README.md
index adabb5b0a5..c23a779048 100644
--- a/paddle/fluid/imperative/README.md
+++ b/paddle/fluid/imperative/README.md
@@ -38,8 +38,6 @@ class PyLayer(core.PyLayer):
   def backward(inputs):
     # any backward logic implemented with numpy io.
 
-
-
 ```
 
 
@@ -62,9 +60,13 @@ class IVariable(PyVarBase):
   def __init__(self):
     self._ivar = core.VarBase()
 
+  # Move var to a device.
   def to(device): pass
+  # Get var value.
   def value(): pass
+  # Trigger backward.
   def backward(): pass
+  # Get var's gradient value.
   def gradient_value(): pass
   # operators to override.
 ```
@@ -100,18 +102,22 @@ Lots of research already.
 https://autodiff-workshop.github.io/
 https://en.wikipedia.org/wiki/Automatic_differentiation
 
-## Execution Engine
+Basically, trace the forward execution, and perform autodiff
+when needed.
 
-Lazy execution of pushed C++ operations.
+* Can be triggered by `backward()`.
+* Can select a block of code to trace and autodiff.
+* Use `require_grad` to drop some forward subgraph that doesn't need autodiff.
 
-## Tests
+## Execution Engine
 
-* All op tests run once in static graph, once in imperative mode.
+Lazy execution of pushed C++ operations.
 
 ## Refactor
 
 * All function layers with parameters converted to class Layers.
-* Models converted to imperative mode.
+* Existing models converted to imperative mode.
+* All op tests run once in static graph, once in imperative mode.
 
 # Examples
 
@@ -140,6 +146,15 @@ class MyPyLayer(fluid.imperative.PyLayer):
         return np.array(dout) * (1 - np.square(np.array(out)))
 
 
+np_inp = np.ones([2, 2], np.float32)
+with fluid.imperative.guard():
+    my_py_layer = MyPyLayer()
+    outs = my_py_layer(np_inp)
+    dy_out = np.sum(outs[0]._numpy())
+    outs[0]._backward()
+    dy_grad = var_inp._gradient()
+
+
 class MLP(fluid.imperative.Layer):
     def __init__(self):
         super(MLP, self).__init__()
@@ -171,6 +186,10 @@ class MLP(fluid.imperative.Layer):
 
 TODO
 
+## I/O
+
+TODO
+
 # Plan
 
 2.1，3 fulltime, Can run a few simple models. (Currently, 2 20% engs)

From 6b762f65192ae8a3c35a9a01a1719c3e9402225f Mon Sep 17 00:00:00 2001
From: Xin Pan <panxin.grad@gmail.com>
Date: Tue, 15 Jan 2019 15:52:39 +0800
Subject: [PATCH 5/5] add doc

test=develop
---
 paddle/fluid/imperative/README.md | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/paddle/fluid/imperative/README.md b/paddle/fluid/imperative/README.md
index c23a779048..4c4d619b35 100644
--- a/paddle/fluid/imperative/README.md
+++ b/paddle/fluid/imperative/README.md
@@ -91,7 +91,7 @@ class Tracer {
 ```
 
 * Trace forward operations
-* Perform simple python level infer and return to user.
+* Perform quick shape/type infer, push kernel execution engine and return to user.
 * Perform autograd to generate gradients.
 * Clear trace.
 * Apply gradients with optimizers
@@ -113,6 +113,20 @@ when needed.
 
 Lazy execution of pushed C++ operations.
 
+## Device Placement
+
+* Operator executes on the inputs' device.
+* All inputs should live on the same device.
+* use `Var.to()` to explicitly move var to a device.
+
+## Save/Load Models
+
+TODO
+
+## I/O
+
+TODO
+
 ## Refactor
 
 * All function layers with parameters converted to class Layers.
@@ -181,15 +195,6 @@ class MLP(fluid.imperative.Layer):
      out._backward()
 ```
 
-
-## Save/Load Models
-
-TODO
-
-## I/O
-
-TODO
-
 # Plan
 
 2.1，3 fulltime, Can run a few simple models. (Currently, 2 20% engs)