From 14f8370738236fdd0de2e5f6c6bbf9c6d2d23e6a Mon Sep 17 00:00:00 2001
From: Yu Yang <yuyang18@baidu.com>
Date: Thu, 22 Feb 2018 11:13:23 +0800
Subject: [PATCH 1/5] Add block.fwd_block_id

---
 paddle/fluid/framework/block_desc.cc   | 38 ++++++++++++++++++++------
 paddle/fluid/framework/block_desc.h    |  8 +++++-
 paddle/fluid/framework/framework.proto |  1 +
 paddle/fluid/framework/program_desc.h  |  8 +++++-
 paddle/fluid/operators/while_op.cc     |  7 +++--
 paddle/fluid/pybind/protobuf.cc        |  2 ++
 python/paddle/v2/fluid/backward.py     |  5 +++-
 python/paddle/v2/fluid/framework.py    | 26 +++++++++++++++---
 8 files changed, 78 insertions(+), 17 deletions(-)
diff --git a/paddle/fluid/framework/block_desc.cc b/paddle/fluid/framework/block_desc.cc
index 0dd37e7df0..996aefd047 100644
--- a/paddle/fluid/framework/block_desc.cc
+++ b/paddle/fluid/framework/block_desc.cc
@@ -46,11 +46,25 @@ VarDesc *BlockDesc::FindVarRecursive(const std::string &name) const {
   if (name == kEmptyVarName) return nullptr;
 
   auto it = vars_.find(name);
-  if (it == vars_.end()) {
-    return Parent() == kNoneBlockIndex ? nullptr
-                                       : ParentBlock()->FindVarRecursive(name);
+  if (it != vars_.end()) {
+    return it->second.get();
   }
-  return it->second.get();
+
+  BlockDesc *tmp = ParentBlock();
+
+  if (tmp != nullptr) {
+    auto ptr = tmp->FindVarRecursive(name);
+    if (ptr != nullptr) {
+      return ptr;
+    }
+  }
+
+  tmp = ForwardBlock();
+  if (tmp != nullptr) {
+    return tmp->FindVarRecursive(name);
+  }
+
+  return nullptr;
 }
 
 VarDesc &BlockDesc::FindRecursiveOrCreateVar(const std::string &name_bytes) {
@@ -136,10 +150,7 @@ void BlockDesc::Flush() {
 }
 
 BlockDesc *BlockDesc::ParentBlock() const {
-  if (this->desc_->parent_idx() == kNoneBlockIndex) {
-    return nullptr;
-  }
-  return prog_->MutableBlock(static_cast<size_t>(this->desc_->parent_idx()));
+  return prog_->MutableBlock(static_cast<size_t>(desc_->parent_idx()));
 }
 
 proto::BlockDesc *BlockDesc::Proto() {
@@ -186,5 +197,16 @@ void BlockDesc::ClearPBVars() {
   }
 }
 
+void BlockDesc::SetForwardBlockID(int32_t forward_block_id) {
+  PADDLE_ENFORCE(!desc_->has_forward_block_idx(),
+                 "Parent block ID has been set to %d. Cannot set to %d",
+                 desc_->forward_block_idx(), forward_block_id);
+  desc_->set_forward_block_idx(forward_block_id);
+}
+
+BlockDesc *BlockDesc::ForwardBlock() const {
+  return prog_->MutableBlock(static_cast<size_t>(desc_->forward_block_idx()));
+}
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/block_desc.h b/paddle/fluid/framework/block_desc.h
index 4e2b03e245..8345934a71 100644
--- a/paddle/fluid/framework/block_desc.h
+++ b/paddle/fluid/framework/block_desc.h
@@ -49,6 +49,8 @@ class BlockDesc {
 
   int32_t Parent() const { return desc_->parent_idx(); }
 
+  int32_t ForwardBlockID() const { return desc_->forward_block_idx(); }
+
   VarDesc *Var(const std::string &name_bytes);
 
   VarDesc *FindVar(const std::string &name_bytes) const;
@@ -73,6 +75,10 @@ class BlockDesc {
 
   BlockDesc *ParentBlock() const;
 
+  BlockDesc *ForwardBlock() const;
+
+  void SetForwardBlockID(int32_t forward_block_id);
+
   OpDesc *AppendOp();
 
   void AppendAllocatedOp(std::unique_ptr<OpDesc> &&op_desc);
@@ -91,7 +97,7 @@ class BlockDesc {
 
   proto::BlockDesc *Proto();
 
-  ProgramDesc *Program() { return this->prog_; }
+  ProgramDesc *Program() const { return this->prog_; }
 
  private:
   void ClearPBOps();
diff --git a/paddle/fluid/framework/framework.proto b/paddle/fluid/framework/framework.proto
index 4eb18b4e4d..5b43f5a8a4 100644
--- a/paddle/fluid/framework/framework.proto
+++ b/paddle/fluid/framework/framework.proto
@@ -158,6 +158,7 @@ message BlockDesc {
   required int32 parent_idx = 2;
   repeated VarDesc vars = 3;
   repeated OpDesc ops = 4;
+  optional int32 forward_block_idx = 5 [ default = -1 ];
 }
 
 // Please refer to
diff --git a/paddle/fluid/framework/program_desc.h b/paddle/fluid/framework/program_desc.h
index 8d4b999ad2..538a037211 100644
--- a/paddle/fluid/framework/program_desc.h
+++ b/paddle/fluid/framework/program_desc.h
@@ -38,7 +38,13 @@ class ProgramDesc {
 
   BlockDesc *AppendBlock(const BlockDesc &parent);
 
-  BlockDesc *MutableBlock(size_t idx) { return blocks_[idx].get(); }
+  BlockDesc *MutableBlock(size_t idx) {
+    if (idx == static_cast<size_t>(kNoneBlockIndex)) {
+      return nullptr;
+    } else {
+      return blocks_[idx].get();
+    }
+  }
 
   const BlockDesc &Block(size_t idx) const { return *blocks_[idx]; }
 
diff --git a/paddle/fluid/operators/while_op.cc b/paddle/fluid/operators/while_op.cc
index 3d5cdeda26..5f51a273dd 100644
--- a/paddle/fluid/operators/while_op.cc
+++ b/paddle/fluid/operators/while_op.cc
@@ -231,7 +231,8 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
     while_grad->SetInput(kStepScopes, Output(kStepScopes));
 
     auto *grad_block = this->grad_block_[0];
-    auto *fwd_block = grad_block->ParentBlock();
+    auto *fwd_block = grad_block->ForwardBlock();
+    auto *parent_block = grad_block->ParentBlock();
 
     // Not all of IGs will be generated by inner gradient operators of while op.
     // Ignore IGs that is not generated by the inside block.
@@ -265,8 +266,10 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
       for (auto &input_name : op->InputArgumentNames()) {
         // If the input of Op has been recorded or is generated by the forward
         // block, do not make it as input again.
+
         if (block_ins.find(input_name) != block_ins.end() ||
-            fwd_block->FindVar(input_name) != nullptr) {
+            fwd_block->FindVar(input_name) != nullptr ||
+            parent_block->FindVar(input_name) != nullptr) {
           continue;
         }
         extra_inputs.insert(input_name);
diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc
index 131971099e..01dc53de78 100644
--- a/paddle/fluid/pybind/protobuf.cc
+++ b/paddle/fluid/pybind/protobuf.cc
@@ -155,6 +155,8 @@ void BindBlockDesc(py::module &m) {
   py::class_<BlockDesc>(m, "BlockDesc", "")
       .def_property_readonly("id", &BlockDesc::ID)
       .def_property_readonly("parent", &BlockDesc::Parent)
+      .def("get_forward_block_idx", &BlockDesc::ForwardBlockID)
+      .def("set_forward_block_idx", &BlockDesc::SetForwardBlockID)
       .def("append_op", &BlockDesc::AppendOp,
            py::return_value_policy::reference)
       .def("prepend_op", &BlockDesc::PrependOp,
diff --git a/python/paddle/v2/fluid/backward.py b/python/paddle/v2/fluid/backward.py
index 33ff43f693..ba27aaa246 100644
--- a/python/paddle/v2/fluid/backward.py
+++ b/python/paddle/v2/fluid/backward.py
@@ -298,7 +298,8 @@ def _append_backward_ops_(block,
         # If the op has its own sub-block, deal with the sub-block first
         if op.has_attr("sub_block"):
             sub_block = program.block(op.block_attr("sub_block"))
-            grad_sub_block = program.create_block(parent_idx=sub_block.idx)
+            grad_sub_block = program.create_block()
+            grad_sub_block.set_forward_block_idx(sub_block.idx)
             cb = _callback_lookup_(op)
             if cb is not None:
                 if callbacks is None:
@@ -310,6 +311,8 @@ def _append_backward_ops_(block,
             else:
                 _append_backward_ops_(sub_block, sub_block.ops, grad_sub_block,
                                       no_grad_dict, grad_to_var, callbacks)
+
+            program.rollback()
             grad_sub_block_list.append(grad_sub_block.desc)
 
         # Getting op's corresponding grad_op
diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py
index 0e11709296..7ec04013c9 100644
--- a/python/paddle/v2/fluid/framework.py
+++ b/python/paddle/v2/fluid/framework.py
@@ -678,6 +678,13 @@ class Block(object):
     def parent_idx(self):
         return self.desc.parent
 
+    @property
+    def forward_block_idx(self):
+        return self.desc.get_forward_block_idx()
+
+    def set_forward_block_idx(self, idx):
+        self.desc.set_forward_block_idx(idx)
+
     @property
     def idx(self):
         return self.desc.id
@@ -695,11 +702,22 @@ class Block(object):
             return self.var(name)
         else:
             if self.idx == 0:
-                raise ValueError("var %s is not in block(%d) nor its parents." %
-                                 name, self.idx)
+                raise ValueError(
+                    "var {0} is not in block({1}) nor its parents.".format(
+                        name, self.idx))
             else:
-                parent_block = self.program.block(self.parent_idx)
-                return parent_block.var_recursive(name)
+                # DFS
+                try:
+                    parent_block = self.program.block(self.parent_idx)
+                    return parent_block.var_recursive(name)
+                except ValueError:
+                    fwd_block = self.program.block(
+                        self.forward_block_idx
+                    ) if self.forward_block_idx != -1 else None
+                    if fwd_block is not None:
+                        return fwd_block.var_recursive(name)
+                    else:
+                        raise
 
     def all_parameters(self):
         return list(self.iter_parameters())

From bf92706c58f8c89db9b670523e8aa4fcd2c067a7 Mon Sep 17 00:00:00 2001
From: qijun <qijun1994@hotmail.com>
Date: Fri, 23 Feb 2018 11:40:30 +0800
Subject: [PATCH 2/5] fix bug in memory optimization transpiler

---
 .../fluid/memory_optimization_transpiler.py   | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/python/paddle/v2/fluid/memory_optimization_transpiler.py b/python/paddle/v2/fluid/memory_optimization_transpiler.py
index ee56ccdcf1..6952ca7fe4 100644
--- a/python/paddle/v2/fluid/memory_optimization_transpiler.py
+++ b/python/paddle/v2/fluid/memory_optimization_transpiler.py
@@ -223,15 +223,15 @@ def get_cfgs(input_program):
 
     # Find while/while_grad block pair
     for grad_id in while_grad_sub_block_ids:
-        parent_id = pdesc.block(grad_id).parent
-        if parent_id in while_sub_block_ids:
-            while_block_id_pair.append((parent_id, grad_id))
-            while_sub_block_ids.remove(parent_id)
+        forward_id = pdesc.block(grad_id).get_forward_block_idx()
+        if forward_id in while_sub_block_ids:
+            while_block_id_pair.append((forward_id, grad_id))
+            while_sub_block_ids.remove(forward_id)
 
     # Get while/while_grad block ops
-    for parent_id, grad_id in while_block_id_pair:
+    for forward_id, grad_id in while_block_id_pair:
         while_block_ops = []
-        while_block = pdesc.block(parent_id)
+        while_block = pdesc.block(forward_id)
         while_block_op_size = while_block.op_size()
         for i in range(while_block_op_size):
             while_block_ops.append(while_block.op(i))
@@ -242,21 +242,21 @@ def get_cfgs(input_program):
             while_block_ops.append(while_grad_block.op(i))
 
         while_op_output = set()
-        while_op_output.update(while_op_dict[parent_id].output_arg_names())
+        while_op_output.update(while_op_dict[forward_id].output_arg_names())
         while_op_output.update(while_op_dict[grad_id].output_arg_names())
 
         ops_list.append((while_block_ops, while_block_op_size, while_op_output))
 
     # Process rest while block ops
-    for parent_id in while_sub_block_ids:
+    for forward_id in while_sub_block_ids:
         while_block_ops = []
-        while_block = pdesc.block(parent_id)
+        while_block = pdesc.block(forward_id)
         while_block_op_size = while_block.op_size()
         for i in range(while_block_op_size):
             while_block_ops.append(while_block.op(i))
 
         while_op_output = set()
-        while_op_output.update(while_op_dict[parent_id].output_arg_names())
+        while_op_output.update(while_op_dict[forward_id].output_arg_names())
 
         ops_list.append((while_block_ops, while_block_op_size, while_op_output))
 

From 65058cfb7ac07204cbd2dcdc05e845a447fc54f8 Mon Sep 17 00:00:00 2001
From: Yu Yang <yuyang18@baidu.com>
Date: Fri, 23 Feb 2018 12:58:32 +0800
Subject: [PATCH 3/5] Change DFS to BFS

---
 paddle/fluid/framework/block_desc.cc | 38 +++++++++++++++--------
 python/paddle/v2/fluid/framework.py  | 46 ++++++++++++++++------------
 2 files changed, 51 insertions(+), 33 deletions(-)

diff --git a/paddle/fluid/framework/block_desc.cc b/paddle/fluid/framework/block_desc.cc
index 996aefd047..1efb775cdc 100644
--- a/paddle/fluid/framework/block_desc.cc
+++ b/paddle/fluid/framework/block_desc.cc
@@ -16,6 +16,8 @@ limitations under the License. */
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/program_desc.h"
 
+#include <queue>
+
 namespace paddle {
 namespace framework {
 
@@ -45,23 +47,33 @@ bool BlockDesc::HasVar(const std::string &name) const {
 VarDesc *BlockDesc::FindVarRecursive(const std::string &name) const {
   if (name == kEmptyVarName) return nullptr;
 
-  auto it = vars_.find(name);
-  if (it != vars_.end()) {
-    return it->second.get();
-  }
+  std::queue<const BlockDesc *> frontier;
+  std::unordered_set<const BlockDesc *> visited;
 
-  BlockDesc *tmp = ParentBlock();
+  frontier.push(this);
 
-  if (tmp != nullptr) {
-    auto ptr = tmp->FindVarRecursive(name);
-    if (ptr != nullptr) {
-      return ptr;
+  while (!frontier.empty()) {  // BFS
+    auto cur = frontier.front();
+    frontier.pop();
+    if (visited.count(cur) != 0) {
+      continue;
+    }
+    auto var = cur->FindVar(name);
+    if (var != nullptr) {
+      return var;
+    }
+
+    auto fwd = cur->ForwardBlock();
+    auto parent = cur->ParentBlock();
+
+    if (fwd != nullptr) {
+      frontier.push(fwd);
+    }
+    if (parent != nullptr) {
+      frontier.push(parent);
     }
-  }
 
-  tmp = ForwardBlock();
-  if (tmp != nullptr) {
-    return tmp->FindVarRecursive(name);
+    visited.insert(cur);
   }
 
   return nullptr;
diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py
index 7ec04013c9..3ec8d97814 100644
--- a/python/paddle/v2/fluid/framework.py
+++ b/python/paddle/v2/fluid/framework.py
@@ -698,26 +698,32 @@ class Block(object):
         return v
 
     def var_recursive(self, name):
-        if self.has_var(name):
-            return self.var(name)
-        else:
-            if self.idx == 0:
-                raise ValueError(
-                    "var {0} is not in block({1}) nor its parents.".format(
-                        name, self.idx))
-            else:
-                # DFS
-                try:
-                    parent_block = self.program.block(self.parent_idx)
-                    return parent_block.var_recursive(name)
-                except ValueError:
-                    fwd_block = self.program.block(
-                        self.forward_block_idx
-                    ) if self.forward_block_idx != -1 else None
-                    if fwd_block is not None:
-                        return fwd_block.var_recursive(name)
-                    else:
-                        raise
+        frontier = list()
+        visited = set()
+
+        frontier.append(self)
+
+        prog = self.program
+
+        while len(frontier) != 0:  # BFS
+            cur = frontier[0]
+            frontier = frontier[1:]
+
+            if id(cur) in visited:
+                continue
+
+            if cur.has_var(name):
+                return cur.var(name)
+
+            if cur.parent_idx != -1:
+                frontier.append(prog.block(cur.parent_idx))
+
+            if cur.forward_block_idx != -1:
+                frontier.append(prog.block(cur.forward_block_idx))
+
+            visited.add(id(cur))
+
+        raise ValueError("Var {0} is not found recursively".format(name))
 
     def all_parameters(self):
         return list(self.iter_parameters())

From 574bcdab42c8db34f0f082ffba69aacbea36c36d Mon Sep 17 00:00:00 2001
From: Yu Yang <yuyang18@baidu.com>
Date: Fri, 23 Feb 2018 13:13:38 +0800
Subject: [PATCH 4/5] Add comments

---
 paddle/fluid/operators/while_op.cc | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/paddle/fluid/operators/while_op.cc b/paddle/fluid/operators/while_op.cc
index 5f51a273dd..8b62b242cf 100644
--- a/paddle/fluid/operators/while_op.cc
+++ b/paddle/fluid/operators/while_op.cc
@@ -261,35 +261,37 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
     for (auto &o : Output(kOutputs)) {
       block_ins.insert(o);
     }
-    std::unordered_set<std::string> extra_inputs;
+    std::unordered_set<std::string> output_grads;
     for (const auto *op : grad_block->AllOps()) {
       for (auto &input_name : op->InputArgumentNames()) {
         // If the input of Op has been recorded or is generated by the forward
         // block, do not make it as input again.
 
+        // The input is located in I/O or other op's outputs or the variable is
+        // located in grad_block's parents
         if (block_ins.find(input_name) != block_ins.end() ||
-            fwd_block->FindVar(input_name) != nullptr ||
-            parent_block->FindVar(input_name) != nullptr) {
+            (fwd_block->FindVarRecursive(input_name) != nullptr ||
+             parent_block->FindVarRecursive(input_name) != nullptr)) {
           continue;
         }
-        extra_inputs.insert(input_name);
+        output_grads.insert(input_name);
       }
       for (auto &output_name : op->OutputArgumentNames()) {
         block_ins.insert(output_name);
       }
     }
 
-    std::vector<std::string> extra_inputs_list;
-    extra_inputs_list.resize(extra_inputs.size());
-    std::copy(extra_inputs.begin(), extra_inputs.end(),
-              extra_inputs_list.begin());
-    while_grad->SetInput(framework::GradVarName(kOutputs), extra_inputs_list);
+    std::vector<std::string> output_grads_list;
+    output_grads_list.resize(output_grads.size());
+    std::copy(output_grads.begin(), output_grads.end(),
+              output_grads_list.begin());
+    while_grad->SetInput(framework::GradVarName(kOutputs), output_grads_list);
 
     while_grad->SetAttrMap(this->Attrs());
     while_grad->SetBlockAttr(kStepBlock, *grad_block);
     // record the original output gradient names, since the gradient name of
     // while operator could be renamed.
-    while_grad->SetAttr("original_output_grad", extra_inputs_list);
+    while_grad->SetAttr("original_output_grad", output_grads_list);
 
     return std::unique_ptr<framework::OpDesc>(while_grad);
   }

From 71053063a105bf64e08ae5826019c05cb7639b3b Mon Sep 17 00:00:00 2001
From: Yu Yang <yuyang18@baidu.com>
Date: Fri, 23 Feb 2018 14:32:35 +0800
Subject: [PATCH 5/5] test Parallel.Do and DynRNN

---
 python/paddle/v2/fluid/layers/control_flow.py |  3 +-
 .../tests/book/test_understand_sentiment.py   | 57 +++++++++++++++++++
 2 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/python/paddle/v2/fluid/layers/control_flow.py b/python/paddle/v2/fluid/layers/control_flow.py
index b9ab28a86a..72056cc7cd 100644
--- a/python/paddle/v2/fluid/layers/control_flow.py
+++ b/python/paddle/v2/fluid/layers/control_flow.py
@@ -652,7 +652,8 @@ class While(object):
         parent_block.append_op(
             type='while',
             inputs={
-                'X': [parent_block.var(x_name) for x_name in x_name_list],
+                'X':
+                [parent_block.var_recursive(x_name) for x_name in x_name_list],
                 'Condition': [self.cond_var]
             },
             outputs={'Out': out_vars,
diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment.py
index af917de8e3..61f46b51c4 100644
--- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment.py
+++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment.py
@@ -47,6 +47,46 @@ def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32,
     return avg_cost, accuracy, prediction
 
 
+def dyn_rnn_lstm(data, label, input_dim, class_dim=2, emb_dim=32,
+                 lstm_size=128):
+    emb = fluid.layers.embedding(
+        input=data, size=[input_dim, emb_dim], is_sparse=True)
+    sentence = fluid.layers.fc(input=emb, size=lstm_size, act='tanh')
+
+    rnn = fluid.layers.DynamicRNN()
+    with rnn.block():
+        word = rnn.step_input(sentence)
+        prev_hidden = rnn.memory(value=0.0, shape=[lstm_size])
+        prev_cell = rnn.memory(value=0.0, shape=[lstm_size])
+
+        def gate_common(ipt, hidden, size):
+            gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True)
+            gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False)
+            return gate0 + gate1
+
+        forget_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
+                                                         lstm_size))
+        input_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
+                                                        lstm_size))
+        output_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
+                                                         lstm_size))
+        cell_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
+                                                       lstm_size))
+
+        cell = forget_gate * prev_cell + input_gate * cell_gate
+        hidden = output_gate * fluid.layers.tanh(x=cell)
+        rnn.update_memory(prev_cell, cell)
+        rnn.update_memory(prev_hidden, hidden)
+        rnn.output(hidden)
+
+    last = fluid.layers.sequence_last_step(rnn())
+    prediction = fluid.layers.fc(input=last, size=class_dim, act="softmax")
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    accuracy = fluid.layers.accuracy(input=prediction, label=label)
+    return avg_cost, accuracy, prediction
+
+
 def stacked_lstm_net(data,
                      label,
                      input_dim,
@@ -270,6 +310,23 @@ class TestUnderstandSentiment(unittest.TestCase):
                 use_cuda=True,
                 parallel=True)
 
+    @unittest.skip(reason='make CI faster')
+    def test_dynrnn_lstm_gpu(self):
+        with self.new_program_scope():
+            main(
+                self.word_dict,
+                net_method=dyn_rnn_lstm,
+                use_cuda=True,
+                parallel=False)
+
+    def test_dynrnn_lstm_gpu_parallel(self):
+        with self.new_program_scope():
+            main(
+                self.word_dict,
+                net_method=dyn_rnn_lstm,
+                use_cuda=True,
+                parallel=True)
+
 
 if __name__ == '__main__':
     unittest.main()