From 56a722a1d01eb49bfbe5120065c615ecf1e16fe5 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Mon, 10 Jul 2017 14:22:18 +0800
Subject: [PATCH 001/170] output all beam search results in layer group.

---
 .../RecurrentGradientMachine.cpp              | 104 ++++++++++++------
 .../RecurrentGradientMachine.h                |   7 +-
 paddle/parameter/Argument.cpp                 |  36 +++---
 paddle/parameter/Argument.h                   |   1 +
 .../paddle/trainer_config_helpers/networks.py |  13 +--
 5 files changed, 102 insertions(+), 59 deletions(-)

diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
index 41e0929959..4cb5b8ec2d 100644
--- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
@@ -1012,11 +1012,6 @@ void RecurrentGradientMachine::generateSequence() {
                            /* width */ resultNum,
                            false,
                            /* useGpu */ false);
-    Matrix::resizeOrCreate(generator_.outArg.value,
-                           /* height */ maxGenWordCount,
-                           /* width */ 1,
-                           false,
-                           /* useGpu */ false);
   }
   ICpuGpuVector::resizeOrCreate(generator_.outArg.sequenceStartPositions,
                                 numSequences + 1,
@@ -1026,7 +1021,7 @@ void RecurrentGradientMachine::generateSequence() {
   } else {
     oneWaySearch(numSequences);
   }
-  if (dataArgsSize_) createDataOutlink(batchMachineIdVec_);
+  if (dataArgsSize_) createDataOutlink();
 
   size_t size = generator_.ids.size();
   generator_.outArg.ids->resize(size);
@@ -1106,6 +1101,7 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
   }
 
   batchMachineIdVec_.clear();
+  batchMachineStartPos_.clear();
   int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false);
   starts[0] = 0;
   generator_.ids.clear();
@@ -1312,13 +1308,20 @@ void RecurrentGradientMachine::fillGenOutputs() {
     finalPaths_[i].resize(minFinalPathsSize);
   }
 
-  batchMachineIdVec_.clear();
   generator_.ids.clear();
   int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false);
   starts[0] = 0;
   if (numResults > 1) {
-    real* probs = generator_.outArg.in->getData();
+    int idsProbSaveSize = 0;
+    for (auto inSeq : finalPaths_) {
+      for (auto path : inSeq) idsProbSaveSize += path.ids.size();
+      idsProbSaveSize += inSeq.size();
+    }
+    Matrix::resizeOrCreate(
+        generator_.outArg.value, idsProbSaveSize, 1, false, false);
     real* idsProb = generator_.outArg.value->getData();
+
+    real* probs = generator_.outArg.in->getData();
     size_t curPos = 0;
     for (size_t i = 0; i < finalPaths_.size(); ++i) {
       for (size_t j = 0; j < finalPaths_[i].size(); ++j) {
@@ -1333,24 +1336,16 @@ void RecurrentGradientMachine::fillGenOutputs() {
         curPos += genLen;
         idsProb[curPos++] = -1.0;
         probs[i * numResults + j] = path.logProb;
-
-        if (!j && dataArgsSize_) {
-          // in beam search, here only reserved the top 1 generated result
-          // for out_links that are not the generated word indices.
-          batchMachineIdVec_.insert(batchMachineIdVec_.end(),
-                                    path.machineIdVec.begin(),
-                                    path.machineIdVec.end());
-        }
       }
       starts[i + 1] = generator_.ids.size();
     }
   } else {
     for (size_t i = 0; i < finalPaths_.size(); ++i) {
       CHECK(!finalPaths_[i].empty());
-      generator_.ids.insert(generator_.ids.begin(),
-                            finalPaths_[i][0].ids.begin(),
-                            finalPaths_[i][0].ids.end());
-      starts[i + 1] = starts[i] + finalPaths_[i][0].ids.size();
+      Path& path = finalPaths_[i][0];
+      generator_.ids.insert(
+          generator_.ids.begin(), path.ids.begin(), path.ids.end());
+      starts[i + 1] = starts[i] + path.ids.size();
     }
   }
 }
@@ -1364,25 +1359,70 @@ void RecurrentGradientMachine::copyDataOutlinkFrame(size_t machineCur) {
   }
 }
 
-void RecurrentGradientMachine::createDataOutlink(
-    std::vector<int>& machineIdVec) {
-  size_t seqNum =
-      getBeamSize() > 1UL ? finalPaths_.size() : finalPaths_[0].size();
-  std::vector<int> starts(seqNum + 1, 0);
-  for (size_t i = 0; i < seqNum; ++i) {
-    size_t seqLen = getBeamSize() > 1UL ? finalPaths_[i][0].ids.size()
-                                        : finalPaths_[0][i].ids.size();
-    starts[i + 1] = starts[i] + seqLen;
+void RecurrentGradientMachine::createDataOutlinkSelRowsInfo(
+    bool isSeq, std::vector<Argument>& outArgs) {
+  batchMachineIdVec_.clear();
+
+  size_t seqIdx = 0;
+  for (size_t i = 0; i < finalPaths_.size(); ++i) {
+    for (size_t j = 0; j < finalPaths_[i].size(); ++j) {
+      std::vector<int>& machineIdVec = finalPaths_[i][j].machineIdVec;
+      if (isSeq) {
+        for (size_t i = 0; i < machineIdVec.size(); ++i) {
+          size_t rowId = machineIdVec[i];
+          int* seqPos =
+              outArgs[i].sequenceStartPositions->getMutableData(false);
+          batchMachineIdVec_.push_back(seqPos[rowId]);
+        }
+      } else {
+        batchMachineIdVec_.insert(
+            batchMachineIdVec_.end(), machineIdVec.begin(), machineIdVec.end());
+      }
+      seqIdx++;
+    }
+  }
+}
+
+void RecurrentGradientMachine::createDataOutlinkCopySizeInfo(
+    bool isSeq, std::vector<Argument>& outArgs, std::vector<int>& copySize) {
+  size_t totalSeqNum = std::accumulate(
+      finalPaths_.begin(),
+      finalPaths_.end(),
+      0UL,
+      [](size_t a, const std::vector<Path>& b) { return a + b.size(); });
+  copySize.resize(totalSeqNum, 1);
+
+  batchMachineStartPos_.resize(totalSeqNum + 1, 0);
+  if (isSeq) {
+    ICpuGpuVectorPtr inputSeqStartPos = outArgs[0].sequenceStartPositions;
+    CHECK_EQ(inputSeqStartPos->getSize() - 1, finalPaths_.size());
+    int* starts = inputSeqStartPos->getMutableData(false);
+    int seqId = 0;
+    for (int i = 0; i < finalPaths_.size(); ++i) {
+      for (int j = 0; j < finalPaths_[i].size(); ++j) {
+        copySize[seqId] = starts[i + 1] - starts[i];
+        batchMachineStartPos_[seqId + 1] =
+            batchMachineStartPos_[seqId] + finalPaths_[i][j].ids.size();
+        seqId++;
+      }
+    }
   }
+}
 
+void RecurrentGradientMachine::createDataOutlink() {
   for (size_t i = 0; i < dataArgsSize_; i++) {
+    bool isSeq = dataArgsFrame_[i][0].hasSeq();
+    std::vector<int> copySize;
+    createDataOutlinkCopySizeInfo(isSeq, dataArgsFrame_[i], copySize);
+    createDataOutlinkSelRowsInfo(isSeq, dataArgsFrame_[i]);
+
     dataArgs_[i].concat(dataArgsFrame_[i],
-                        machineIdVec,
-                        starts,
+                        batchMachineIdVec_,
+                        batchMachineStartPos_,
+                        copySize,
                         useGpu_,
                         HPPL_STREAM_1,
                         PASS_TEST);
-
     auto dataAgent =
         dynamic_cast<DataLayer*>(outFrameLines_[i + 1].agentLayer.get());
     CHECK_NOTNULL(dataAgent);
diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h
index fb3fc5877a..bd096770b7 100644
--- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h
@@ -480,7 +480,11 @@ private:
    * @param machineIdVec : select a row of output matrix in each frame
    * that the generation process expanded.
    */
-  void createDataOutlink(std::vector<int>& machineIdVec);
+  void createDataOutlink();
+  void createDataOutlinkCopySizeInfo(bool isSeq,
+                                     std::vector<Argument>& outArgs,
+                                     std::vector<int>& copySize);
+  void createDataOutlinkSelRowsInfo(bool isSeq, std::vector<Argument>& outArgs);
 
   /*
    * @brief used in beam search, connect previous frame to form recurrent link
@@ -543,6 +547,7 @@ private:
   std::vector<int> topIds_;
   std::vector<int> seqIds_;
   std::vector<int> batchMachineIdVec_;
+  std::vector<int> batchMachineStartPos_;
   std::vector<std::vector<Path>> finalPaths_;
   std::vector<real> minFinalPathLogProb_;
   BeamSearchControlCallbacks* beamSearchCtrlCallbacks_;
diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index ef72b973c1..e7522def08 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -276,17 +276,21 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src,
 void Argument::concat(const std::vector<Argument>& args,
                       const std::vector<int>& selectRows,
                       const std::vector<int>& seqStartPos,
+                      const std::vector<int>& copySize,
                       bool useGpu,
                       hl_stream_t stream,
                       PassType passType) {
   CHECK(!subSequenceStartPositions)
       << "undefined behavior for subsequence positions";
 
-  size_t batchSize = selectRows.size();
+  size_t batchSize = 0;
+  for (size_t i = 0; i < copySize.size(); ++i)
+    batchSize += copySize[i] * (seqStartPos[i + 1] - seqStartPos[i]);
+
   auto copyArg = [batchSize, stream](MatrixPtr& dst,
                                      MatrixPtr src,
-                                     int startRow,
-                                     int pos,
+                                     int desStartRow,
+                                     int srcStartRow,
                                      int size,
                                      bool useGpu) {
     if (!src) {
@@ -300,8 +304,8 @@ void Argument::concat(const std::vector<Argument>& args,
       dst->resize(batchSize, width);
     }
 
-    MatrixPtr tmpMatrix = dst->subMatrix(startRow, size);
-    tmpMatrix->copyFrom(*src->subMatrix(pos, size), stream);
+    MatrixPtr tmpMatrix = dst->subMatrix(desStartRow, size);
+    tmpMatrix->copyFrom(*src->subMatrix(srcStartRow, size), stream);
   };
 
   auto copyIds = [batchSize, stream](IVectorPtr& dst,
@@ -339,24 +343,24 @@ void Argument::concat(const std::vector<Argument>& args,
 
   dataId = args[0].dataId;
   CHECK_NE(seqStartPos.size(), 0UL);
-  size_t sampleNum = seqStartPos.size() - 1;
-  for (size_t i = 0; i < sampleNum; ++i) {
+  int desStartRow = 0;
+  for (size_t i = 0; i < copySize.size(); ++i) {
     int startPos = seqStartPos[i];
     int endPos = seqStartPos[i + 1];
     CHECK_GE(args.size(), static_cast<size_t>(endPos - startPos));
     for (int j = startPos; j < endPos; ++j) {
       const Argument& arg = args[j - startPos];
-      CHECK_EQ(arg.dataId, dataId) << "Arguments in concat should have"
-                                   << " same dataId";
-      const int copySize = 1;
-      const int rowIdx = selectRows[j];
-      copyArg(in, arg.in, j, rowIdx, copySize, useGpu);
-      copyArg(value, arg.value, j, rowIdx, copySize, useGpu);
+      CHECK_EQ(arg.dataId, dataId) << "Arguments in concat should have the "
+                                   << "same dataId";
+      const int srcStartRow = selectRows[j];
+      copyArg(in, arg.in, desStartRow, srcStartRow, copySize[i], useGpu);
+      copyArg(value, arg.value, desStartRow, srcStartRow, copySize[i], useGpu);
       if (passType != PASS_TEST) {
-        copyArg(grad, arg.grad, j, rowIdx, copySize, useGpu);
+        copyArg(grad, arg.grad, desStartRow, srcStartRow, copySize[i], useGpu);
       }
-      copyIds(ids, arg.ids, j, rowIdx, copySize, useGpu);
-      copyStrs(strs, arg.strs, j, rowIdx, copySize, useGpu);
+      copyIds(ids, arg.ids, desStartRow, srcStartRow, copySize[i], useGpu);
+      copyStrs(strs, arg.strs, desStartRow, srcStartRow, copySize[i], useGpu);
+      desStartRow += copySize[i];
     }
   }
   ICpuGpuVector::resizeOrCreate(
diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h
index 0ccdef802e..be87175658 100644
--- a/paddle/parameter/Argument.h
+++ b/paddle/parameter/Argument.h
@@ -240,6 +240,7 @@ struct Argument {
   void concat(const std::vector<Argument>& args,
               const std::vector<int>& selectRows,
               const std::vector<int>& seqStartPos,
+              const std::vector<int>& copySize,
               bool useGpu,
               hl_stream_t stream,
               PassType passType);
diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py
index b77932ce5f..c0b2ced234 100755
--- a/python/paddle/trainer_config_helpers/networks.py
+++ b/python/paddle/trainer_config_helpers/networks.py
@@ -1370,14 +1370,7 @@ def simple_attention(encoded_sequence,
         param_attr=softmax_param_attr,
         name="%s_softmax" % name,
         bias_attr=False)
-
-    scaled = scaling_layer(
-        weight=attention_weight,
-        input=encoded_sequence,
-        name='%s_scaling' % name)
-
-    return pooling_layer(
-        input=scaled, pooling_type=SumPooling(), name="%s_pooling" % name)
+    return attention_weight
 
 
 def inputs(layers, *args):
@@ -1395,7 +1388,7 @@ def inputs(layers, *args):
     if len(args) != 0:
         layers.extend(args)
 
-    Inputs(* [l.name for l in layers])
+    Inputs(*[l.name for l in layers])
 
 
 def outputs(layers, *args):
@@ -1438,7 +1431,7 @@ def outputs(layers, *args):
     assert len(layers) > 0
 
     if HasInputsSet():  # input already set
-        Outputs(* [l.name for l in layers])
+        Outputs(*[l.name for l in layers])
         return  # just return outputs.
 
     if len(layers) != 1:

From 4c134c7c7d201a9f28449974d489111b51c6f6fb Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Fri, 14 Jul 2017 17:21:36 +0800
Subject: [PATCH 002/170] add comments.

---
 .../RecurrentGradientMachine.h                | 38 ++++++++++++++++---
 paddle/parameter/Argument.cpp                 |  4 +-
 .../paddle/trainer_config_helpers/networks.py |  4 +-
 3 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h
index a3d04b207c..cc0eda9f13 100644
--- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h
@@ -190,7 +190,7 @@ public:
     std::vector<int> ids;
 
     /**
-     * @brief idsProb, log probability of each generated words.
+     * @brief idsProb, log probability of each generated word.
      */
     std::vector<real> idsProb;
 
@@ -472,16 +472,42 @@ private:
   void copyDataOutlinkFrame(size_t machineCur);
 
   /*
-   * @brief In generation, if the layer group has more than 1 outlink, outlinks
-   * except the first one are data outlinks. This function creates the data
-   * outlinks.
-   * @note In beam search, only one generated sequence with the hightest log
-   * probabilites are retained.
+   * @brief In generation, if the layer group has more than 1 outlink, outlink
+   * except the first one is a data outlink. In RecurrentLayerGroup, each time
+   * step is a separate Network, outputs of a layer inside the
+   * RecurrentLayerGroup are stored in separate Arguments. If one layer is
+   * specified as an outlink of RecurrentLayerGroup. This function will
+   * collect outputs in each time step of each generated sequence which are
+   * dispersed in separate Arguments to form a new single Argument as output of
+   * RecurrentLayerGroup.
    */
   void createDataOutlink();
+
+  /*
+   * @brief decide to select how many rows from the Matrix stored the forward
+   * pass results from a start position.
+   *
+   * @param isSeq: a flag indicating whetehr the layer to be output of the
+   * RecurrentGradientMachine is a sequence or not
+   * @param outArgs: all of the the returned Arguments of the forward pass
+   * during the generation process.
+   * @param copySize: the returned result, number of rows to select from the
+   * Matrix stored the forward pass results from a start position.
+   */
   void createDataOutlinkCopySizeInfo(bool isSeq,
                                      std::vector<Argument>& outArgs,
                                      std::vector<int>& copySize);
+
+  /*
+   * @brief decide index of the start row for each time step of a generated
+   * sequence in Matrix stored the entire beam search batch's forward pass
+   * results.
+   *
+   * @param isSeq: a flag indicating whetehr the layer to be output of the
+   * RecurrentGradientMachine is a sequence or not
+   * @param outArgs: all of the the returned Arguments of the forward pass
+   * during the generation process.
+   */
   void createDataOutlinkSelRowsInfo(bool isSeq, std::vector<Argument>& outArgs);
 
   /*
diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index f45a51d7b1..9a9092af9b 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -352,8 +352,8 @@ void Argument::concat(const std::vector<Argument>& args,
     CHECK_GE(args.size(), static_cast<size_t>(endPos - startPos));
     for (int j = startPos; j < endPos; ++j) {
       const Argument& arg = args[j - startPos];
-      CHECK_EQ(arg.dataId, dataId) << "Arguments in concat should have the "
-                                   << "same dataId";
+      CHECK_EQ(arg.dataId, dataId) << "Arguments to concatenate should have "
+                                   << "the same dataId.";
       const int srcStartRow = selectRows[j];
       copyArg(in, arg.in, desStartRow, srcStartRow, copySize[i], useGpu);
       copyArg(value, arg.value, desStartRow, srcStartRow, copySize[i], useGpu);
diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py
index 30c826ffc8..810bea913e 100755
--- a/python/paddle/trainer_config_helpers/networks.py
+++ b/python/paddle/trainer_config_helpers/networks.py
@@ -1375,9 +1375,9 @@ def simple_attention(encoded_sequence,
         weight=attention_weight,
         input=encoded_sequence,
         name='%s_scaling' % name)
+
     return pooling_layer(
-        input=scaled, pooling_type=SumPooling(),
-        name="%s_pooling" % name), attention_weight
+        input=scaled, pooling_type=SumPooling(), name="%s_pooling" % name)
 
 
 def inputs(layers, *args):

From 9d569c5a38582cbf9022578c046f89a88697c493 Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Thu, 3 Aug 2017 17:57:00 -0700
Subject: [PATCH 003/170] Update Backward.md

Add the "Backward Operator Registry" section
---
 paddle/framework/backward.md | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/paddle/framework/backward.md b/paddle/framework/backward.md
index 74c001b06a..61f308b469 100644
--- a/paddle/framework/backward.md
+++ b/paddle/framework/backward.md
@@ -1,8 +1,28 @@
-## Operator/expression 's Backward
+# Operator/expression 's Backward
 
-### Motivation
+## Motivation
 
 In Neural Network, the backpropagation algorithm follows the chain rule, so we need to compound the fundmental gradient operators/expressions together with chain rule . Every forward network need a backward network to construct the full computation lineage, the operator/ expression's Backward feature will generate the backward pass respect to forward pass.
+ 
+## Backward Operator Registry
+
+A backward network is built up with several backward operators. Backward operators take forward operators' inputs, outputs and output gradients, and then calculate its input gradients. In most cases, there is a one-to-one correspondence between forward and backward operators. We use registry mechanism to save these correspondences, which is quite similar with operator registry itself.
+
+For example, we have got a `add_two_op`, and is registered by the following code:
+
+```cpp
+REGISTER_OP(add_two, AddTwoOp, AddTwoOpMaker);
+```
+
+`add_two` is the operator's type. `AddTwoOp` and `AddTwoOpMaker` are the operator class and the operator maker class respectively.
+
+Assume that we have also got the backward operator of `add_two_op`, which calculating the gradients of `add_two_op`'s inputs. Then we register it by the following way:
+
+```cpp
+REGISTER_GRADIENT_OP(add_two, add_two_grad, AddTwoGradOp);
+```
+
+`add_two_grad` is the type of backward operator, and `AddTwoGradOp` is its class name.
 
 ### Implement : gradient operator registry
 

From 84627bb934ed6b4c7213eeebc0fe59e5fbe7a84b Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Mon, 7 Aug 2017 14:03:13 +0800
Subject: [PATCH 004/170] add config helper for sequence slice layer.

---
 doc/api/v2/config/layer.rst                   |  5 ++
 python/paddle/trainer/config_parser.py        | 45 +++++++++++
 .../paddle/trainer_config_helpers/layers.py   | 68 ++++++++++++++++
 .../tests/configs/file_list.sh                |  3 +-
 .../protostr/test_seq_slice_layer.protostr    | 79 +++++++++++++++++++
 .../tests/configs/test_seq_slice_layer.py     | 13 +++
 6 files changed, 212 insertions(+), 1 deletion(-)
 create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr
 create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py

diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst
index 372272a53c..232ea6b49b 100644
--- a/doc/api/v2/config/layer.rst
+++ b/doc/api/v2/config/layer.rst
@@ -257,6 +257,11 @@ seq_concat
 ..  autoclass:: paddle.v2.layer.seq_concat
     :noindex:
 
+seq_slice
+---------
+..  autoclass:: paddle.v2.layer.seq_slice
+    :noindex:
+
 Reshaping Layers
 ================
 
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 9ea69fc5e5..11e54ba420 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -2657,6 +2657,51 @@ class SubSequenceLayer(LayerBase):
         self.create_bias_parameter(bias, size)
 
 
+@config_layer('seq_slice')
+class SeqSliceLayer(LayerBase):
+    def __init__(self, name, inputs, starts, ends, bias=False, **xargs):
+        if isinstance(inputs, list):
+            assert len(inputs) == 1, ('the first input of sequence slice layer '
+                                      'is a single sequence input.')
+        else:
+            inputs = [inputs]
+
+        if starts is not None:
+            if isinstance(starts, list):
+                assert len(starts) == 1, (
+                    'the start indices for sequence slice layer cannot '
+                    'be a list having more than one element.')
+                starts = starts[0]
+            inputs.append(starts)
+
+        if ends is not None:
+            if isinstance(ends, list):
+                assert len(ends) == 1, (
+                    'the end indices for sequence slice layer cannot '
+                    'be a list having more than one element.')
+                ends = ends[0]
+            inputs.append(ends)
+        assert len(inputs) >= 2, (
+            'the sequence slice layer has at least two inputs.')
+
+        super(SeqSliceLayer, self).__init__(
+            name, 'seq_slice', 0, inputs=inputs, **xargs)
+        input_layer0 = self.get_input_layer(0)
+        size = input_layer0.size
+        self.set_layer_size(size)
+
+        if len(inputs) == 3:
+            assert (
+                self.get_input_layer(1).size == self.get_input_layer(2).size), (
+                    'If start and end indices are both given to'
+                    'sequence slice layer, they should have the same width.')
+        elif len(inputs) == 2:
+            if starts is not None:
+                self.config.select_first = True
+            else:
+                self.config.select_first = False
+
+
 @config_layer('out_prod')
 class OuterProdLayer(LayerBase):
     def __init__(self, name, inputs, device=None):
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index ea5fdcc50f..15636b1442 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -131,6 +131,7 @@ __all__ = [
     'crop_layer',
     'clip_layer',
     'slice_projection',
+    'seq_slice_layer',
 ]
 
 
@@ -225,6 +226,7 @@ class LayerType(object):
     PRELU = 'prelu'
     CROP_LAYER = 'crop'
     CLIP_LAYER = 'clip'
+    SEQ_SLICE = 'seq_slice'
 
     @staticmethod
     def is_layer_type(type_name):
@@ -6119,3 +6121,69 @@ def clip_layer(input, min, max, name=None):
         max=max)
     return LayerOutput(
         name, LayerType.CLIP_LAYER, parents=[input], size=input.size)
+
+
+@wrap_name_default()
+def seq_slice_layer(input, starts, ends, name=None):
+    """
+    seq_slice_layer will return one or several sub-sequences from the
+    input sequence layer given start and end indices.
+
+        - If only start indices are given, and end indices are set to None,
+          this layer slices the input sequence from the given start indices
+          to its end.
+        - If only end indices are given, and start indices are set to None,
+          this layer slices the input sequence from its beginning to the
+          given end indices.
+        - If start and end indices are both given, they should have the same
+          number of elements.
+
+    If start or end indices contains more than one elements, the input sequence
+    will be sliced for multiple times.
+
+
+    .. code-block:: python
+
+        seq_silce = seq_slice_layer(input=input_seq,
+                                    starts=start_pos, ends=end_pos)
+
+    :param name: name of this layer.
+    :type name: basestring
+    :param input: input for this layer, it should be a sequence.
+    :type input: LayerOutput
+    :param starts: start indices to slice the input sequence.
+    :type starts: LayerOutput|None
+    :param ends: end indices to slice the input sequence.
+    :type ends: LayerOutput|None
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+
+    """
+
+    assert isinstance(input, LayerOutput), (
+        'The first input of seq_slice layer must be a PaddlePaddle layer.')
+
+    if starts is not None:
+        assert isinstance(starts, LayerOutput), (
+            'The start indices for seq_slice layer '
+            'must be a PaddlePaddle layer.')
+    if ends is not None:
+        assert isinstance(ends, LayerOutput), (
+            'The end indices for seq_slice layer must be a PaddlePaddle layer.')
+    assert starts is not None or ends is not None, (
+        'start and end indices '
+        'cannot be set to None at the same time, at least one of '
+        'them should be given.')
+    if starts is not None and ends is not None:
+        assert starts.size == ends.size, (
+            'If start and end indices are both given to seq_slice_layer, '
+            'they should have the same width.')
+
+    Layer(
+        name=name,
+        type=LayerType.SEQ_SLICE,
+        inputs=input.name,
+        starts=starts.name if starts is not None else None,
+        ends=ends.name if ends is not None else None)
+    return LayerOutput(
+        name, LayerType.SEQ_SLICE, parents=[input], size=input.size)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
index 0ffa58bc1e..1ce865ceac 100755
--- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
@@ -7,6 +7,7 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
 test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
 test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
 test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
-test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer)
+test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
+test_seq_slice_layer)
 
 export whole_configs=(test_split_datasource)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr
new file mode 100644
index 0000000000..5b73d614fe
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr
@@ -0,0 +1,79 @@
+type: "nn"
+layers {
+  name: "word"
+  type: "data"
+  size: 128
+  active_type: ""
+}
+layers {
+  name: "starts"
+  type: "data"
+  size: 5
+  active_type: ""
+}
+layers {
+  name: "ends"
+  type: "data"
+  size: 5
+  active_type: ""
+}
+layers {
+  name: "__seq_slice_layer_0__"
+  type: "seq_slice"
+  size: 128
+  active_type: ""
+  inputs {
+    input_layer_name: "word"
+  }
+  inputs {
+    input_layer_name: "starts"
+  }
+  inputs {
+    input_layer_name: "ends"
+  }
+}
+layers {
+  name: "__seq_slice_layer_1__"
+  type: "seq_slice"
+  size: 128
+  active_type: ""
+  inputs {
+    input_layer_name: "word"
+  }
+  inputs {
+    input_layer_name: "starts"
+  }
+  select_first: true
+}
+layers {
+  name: "__seq_slice_layer_2__"
+  type: "seq_slice"
+  size: 128
+  active_type: ""
+  inputs {
+    input_layer_name: "word"
+  }
+  inputs {
+    input_layer_name: "ends"
+  }
+  select_first: false
+}
+input_layer_names: "word"
+output_layer_names: "__seq_slice_layer_0__"
+output_layer_names: "__seq_slice_layer_1__"
+output_layer_names: "__seq_slice_layer_2__"
+sub_models {
+  name: "root"
+  layer_names: "word"
+  layer_names: "starts"
+  layer_names: "ends"
+  layer_names: "__seq_slice_layer_0__"
+  layer_names: "__seq_slice_layer_1__"
+  layer_names: "__seq_slice_layer_2__"
+  input_layer_names: "word"
+  output_layer_names: "__seq_slice_layer_0__"
+  output_layer_names: "__seq_slice_layer_1__"
+  output_layer_names: "__seq_slice_layer_2__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py
new file mode 100644
index 0000000000..510ad32208
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+#coding=utf-8
+from paddle.trainer_config_helpers import *
+
+input_seq = data_layer("word", size=128)
+starts = data_layer("starts", size=5)
+ends = data_layer("ends", size=5)
+
+seq_slice1 = seq_slice_layer(input=input_seq, starts=starts, ends=ends)
+seq_slice2 = seq_slice_layer(input=input_seq, starts=starts, ends=None)
+seq_slice3 = seq_slice_layer(input=input_seq, starts=None, ends=ends)
+
+outputs(seq_slice1, seq_slice2, seq_slice3)

From 2988a58ef01a56e84cff02463972e0150bc6ab13 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Tue, 8 Aug 2017 08:52:05 +0800
Subject: [PATCH 005/170] add unittest.

---
 paddle/gserver/tests/CMakeLists.txt           |   6 +
 .../gserver/tests/test_SeqSliceLayerGrad.cpp  | 214 ++++++++++++++++++
 2 files changed, 220 insertions(+)
 create mode 100644 paddle/gserver/tests/test_SeqSliceLayerGrad.cpp

diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt
index 4546d12a90..9fdb148864 100644
--- a/paddle/gserver/tests/CMakeLists.txt
+++ b/paddle/gserver/tests/CMakeLists.txt
@@ -30,6 +30,12 @@ add_unittest_without_exec(test_CRFLayerGrad
 add_test(NAME test_CRFLayerGrad
     COMMAND test_CRFLayerGrad)
 
+################ test_SeqSliceLayerGrad ####################
+add_unittest_without_exec(test_SeqSliceLayerGrad
+    test_SeqSliceLayerGrad.cpp
+    LayerGradUtil.cpp)
+add_test(NAME test_SeqSliceLayerGrad
+    COMMAND test_SeqSliceLayerGrad)
 
 add_unittest_without_exec(test_ActivationGrad
     test_ActivationGrad.cpp
diff --git a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
new file mode 100644
index 0000000000..e456dd5db7
--- /dev/null
+++ b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
@@ -0,0 +1,214 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "ModelConfig.pb.h"
+#include "paddle/gserver/layers/DataLayer.h"
+#include "paddle/trainer/Trainer.h"
+
+#include "LayerGradUtil.h"
+#include "paddle/testing/TestUtil.h"
+
+using namespace paddle;  // NOLINT
+using namespace std;     // NOLINT
+
+DECLARE_int32(gpu_id);
+DECLARE_bool(thread_local_rand_use_global_seed);
+
+const int MAX_SEQ_NUM = 5;
+const int MAX_SEQ_LEN = 5;
+const int MAX_BEAM_SIZE = 3;
+
+vector<real> randSampling(real range, int n) {
+  CHECK_GE(range, n);
+  vector<real> num(range);
+  iota(begin(num), end(num), 0.);
+  if (range == n) return num;
+
+  random_shuffle(begin(num), end(num));
+  num.resize(n);
+  sort(begin(num), end(num));
+  return num;
+}
+
+void genSeqInfo(vector<int>& seqStartPos, vector<int>& subSeqStartPos) {
+  seqStartPos.resize(1, 0);
+  subSeqStartPos.resize(1, 0);
+
+  // srand((size_t)(time(NULL)));
+  srand(1);
+  int seqNum = 1 + (rand() % MAX_SEQ_NUM);
+  for (int i = 0; i < seqNum; ++i) {
+    int subSeqNum = 1 + (rand() % MAX_SEQ_NUM);
+    for (int j = 0; j < subSeqNum; ++j)
+      subSeqStartPos.push_back(subSeqStartPos.back() +
+                               (1 + (rand() % MAX_SEQ_LEN)));
+    seqStartPos.push_back(subSeqStartPos.back());
+  }
+}
+
+/*
+  generate start indices according to sequence start positions.
+ */
+void genStarts(vector<int>& seqStartPos,
+               vector<vector<real>>& starts,
+               size_t beamSize) {
+  starts.clear();
+  starts.resize(seqStartPos.size() - 1, vector<real>(beamSize, -1.));
+
+  for (size_t i = 0; i < seqStartPos.size() - 1; ++i) {
+    int seqLen = seqStartPos[i + 1] - seqStartPos[i];
+    vector<real> randStarts =
+        randSampling(seqLen, min(seqLen, static_cast<int>(beamSize)));
+    copy(begin(randStarts), end(randStarts), begin(starts[i]));
+  }
+}
+
+/*
+  generate end indices according to sequence start positions and start indices.
+ */
+void genEnds(vector<int>& seqStartPos,
+             vector<vector<real>>& starts,
+             vector<vector<real>>& ends,
+             size_t beamSize) {
+  CHECK_EQ(seqStartPos.size() - 1, starts.size());
+  ends.clear();
+  ends.resize(seqStartPos.size() - 1, vector<real>(beamSize, -1.));
+
+  for (size_t i = 0; i < starts.size(); ++i) {
+    for (size_t j = 0; j < starts[i].size(); ++j) {
+      int seqLen = seqStartPos[i + 1] - seqStartPos[i];
+      CHECK_GE(seqLen - 1, starts[i][j]);
+      if (starts[i][j] == -1.) break;
+      if (starts[i][j] == (seqLen - 1)) {
+        ends[i][j] = starts[i][j];
+      } else {
+        ends[i][j] = starts[i][j] + randSampling(seqLen - starts[i][j], 1)[0];
+      }
+    }
+  }
+}
+
+void genTestData(vector<int>& seqStartPos,
+                 vector<int>& subSeqStartPos,
+                 vector<vector<real>>& starts,
+                 vector<vector<real>>& ends,
+                 bool hasSubseq) {
+  size_t beamSize = MAX_BEAM_SIZE;
+  genSeqInfo(seqStartPos, subSeqStartPos);
+
+  genStarts(hasSubseq ? subSeqStartPos : seqStartPos, starts, beamSize);
+  genEnds(hasSubseq ? subSeqStartPos : seqStartPos, starts, ends, beamSize);
+}
+
+template <typename T>
+void flatten2dVector(vector<vector<T>>& inVec, vector<T>& outVec) {
+  size_t totalSize{0};
+  for (auto const& items : inVec) totalSize += items.size();
+  outVec.reserve(totalSize);
+
+  for (auto& items : inVec)
+    move(items.begin(), items.end(), back_inserter(outVec));
+}
+
+void testSeqSliceLayer(bool hasSubseq,
+                       bool useGpu,
+                       vector<int>& seqStartPos,
+                       vector<int>& subSeqStartPos,
+                       vector<vector<real>>& starts,
+                       vector<vector<real>>& ends) {
+  // layer size is not crutial for this layer,
+  // so here use a small layer size in the unittest.
+  const size_t layerSize{4};
+  TestConfig config;
+  config.layerConfig.set_type("seq_slice");
+  config.layerConfig.set_size(layerSize);
+
+  // add the first input
+  MatrixPtr seqInputPtr =
+      Matrix::create(hasSubseq ? subSeqStartPos.back() : seqStartPos.back(),
+                     layerSize,
+                     false,
+                     false);
+  seqInputPtr->randomizeUniform();
+
+  if (hasSubseq) {
+    config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
+                                "seq_input",
+                                seqInputPtr,
+                                seqStartPos,
+                                subSeqStartPos});
+  } else {
+    config.inputDefs.push_back(
+        {INPUT_SELF_DEFINE_DATA, "seq_input", seqInputPtr, seqStartPos});
+  }
+  config.layerConfig.add_inputs();
+
+  // add start indices
+  if (starts.size()) {
+    vector<real> startsToVec;
+    flatten2dVector(starts, startsToVec);
+
+    MatrixPtr startMatrixPtr =
+        Matrix::create(starts.size(), starts[0].size(), false, false);
+    startMatrixPtr->copyFrom(startsToVec.data(), startsToVec.size());
+
+    config.inputDefs.push_back(
+        {INPUT_SELF_DEFINE_DATA, "starts", startMatrixPtr});
+    config.layerConfig.add_inputs();
+  }
+
+  // add end indices
+  if (ends.size()) {
+    vector<real> endsToVec;
+    flatten2dVector(ends, endsToVec);
+    MatrixPtr endMatrixPtr =
+        Matrix::create(ends.size(), ends[0].size(), false, false);
+    config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "ends", endMatrixPtr});
+    config.layerConfig.add_inputs();
+  }
+
+  testLayerGrad(config, "seq_slice", /*batchSize*/ 100, false, useGpu, false);
+}
+
+TEST(Layer, SeqSliceLayer) {
+  vector<int> seqStartPos;
+  vector<int> subSeqStartPos;
+  vector<vector<real>> starts;
+  vector<vector<real>> ends;
+
+  genSeqInfo(seqStartPos, subSeqStartPos);
+  for (bool hasSubseq : {false, true}) {
+    genTestData(seqStartPos, subSeqStartPos, starts, ends, hasSubseq);
+    for (bool useGpu : {false, true}) {
+      vector<vector<real>> tmp;
+      testSeqSliceLayer(
+          hasSubseq, useGpu, seqStartPos, subSeqStartPos, tmp, ends);
+      testSeqSliceLayer(
+          hasSubseq, useGpu, seqStartPos, subSeqStartPos, starts, tmp);
+      testSeqSliceLayer(
+          hasSubseq, useGpu, seqStartPos, subSeqStartPos, starts, ends);
+    }
+  }
+}
+
+int main(int argc, char** argv) {
+  initMain(argc, argv);
+  hl_start();
+  hl_init(FLAGS_gpu_id);
+  FLAGS_thread_local_rand_use_global_seed = true;
+  srand(1);
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}

From 7304006b7121c844d071227a6c2d24245a06e32e Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Tue, 8 Aug 2017 16:38:27 -0700
Subject: [PATCH 006/170] Update backward.md

---
 paddle/framework/backward.md | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/paddle/framework/backward.md b/paddle/framework/backward.md
index 61f308b469..c717c2f30b 100644
--- a/paddle/framework/backward.md
+++ b/paddle/framework/backward.md
@@ -24,20 +24,31 @@ REGISTER_GRADIENT_OP(add_two, add_two_grad, AddTwoGradOp);
 
 `add_two_grad` is the type of backward operator, and `AddTwoGradOp` is its class name.
 
-### Implement : gradient operator registry
+## Backward Opeartor Creating
 
-|                        | forward operator | backward operator                |
-| ---------------------- | ---------------- | -------------------------------- |
-| **Operator::inputs_**  | Inputs           | Inputs, Outputs, OutputGradients |
-| **Operator::outputs_** | Outputs          | InputGradients                   |
+### Usage
 
-Inputs/Outputs means the input/output of the operator,  InputGradients/OutputGradients is the gradient respect to forward opeartor. Forward operator and Backward operator are isomorphic, save their corresponding needs into member attribute.
+Given a certain forward operator, we can get its corresponding backward opeartor by calling:
 
-We use a global hash map record the gradient operators available, follow the philosophy  of minimum core, make operator pluggable unit. Each gradient is an operator and it needs to regist itself. 
+```cpp
+OperatorBase* bwd_op = BuildGradOp(const OperatorBase* fwd_op);
+``` 
+
+The function `BuildGradOp` will sequentially execute following processes:
+
+1. Getting the `type_` of given forward operator, and then creating the corresponding backward operator.
+
+2. Copying all the attributes of forward operator expect `input_format` and `output_format`(if it has), for their elements differ between forward and backward operators.
+
+3. Copying forward operator's `inputs_` and `outputs_` to backward operator's `inputs_`. And adding forward inputs' gradient variables into backward `output_`, adding forward outputs' gradient variables into backward `input_`.
+
+4. Building backward operator's `input_format`, `output_format` (if necessary) and `in_out_idxs_` according to its `inputs_` and `outputs_` just created.
+
+## Backward Network Building
 
-grad_op_builder(fengjiayi)
+A backward network is a series of backward operators. The main idea of building a backward network is creating backward operators in the inverted sequence and put them together.
 
-### Implement : Backward network
+In our design, the network itself is also a kind of operator. So the operators contained by a big network may be some small network. 
 
 given a forward network, it generates the backward network. We only care about the Gradients—`OutputGradients`,`InputGradients`.
 

From 26bc5b12596c945956f7a6b003712805e579a36d Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Tue, 8 Aug 2017 18:48:11 +0800
Subject: [PATCH 007/170] add implementations.

---
 paddle/gserver/layers/KmaxSeqScoreLayer.cpp   |   5 +
 paddle/gserver/layers/SequenceSliceLayer.cpp  | 228 ++++++++++++++++++
 .../gserver/layers/SubNestedSequenceLayer.cpp |  16 +-
 .../gserver/tests/test_SeqSliceLayerGrad.cpp  |  25 +-
 paddle/parameter/Argument.cpp                 |  27 ++-
 5 files changed, 278 insertions(+), 23 deletions(-)
 create mode 100644 paddle/gserver/layers/SequenceSliceLayer.cpp

diff --git a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
index 8ce591d476..e96fd61fc1 100644
--- a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
+++ b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
@@ -97,6 +97,11 @@ void KmaxSeqScoreLayer::forward(PassType passType) {
     scores_ = inputScore;
   }
 
+  // TODO(caoying)
+  // Here selSubSeqIdx is automatically converted from real to int
+  // This is very dangerous if user fill this matrix himself, invalid data may
+  // occur. The selected indices should be stored in
+  // CpuSparseMatrix with SparseValueType set to NO_VALUE.
   Matrix::resizeOrCreate(
       output_.value,
       input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(),
diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp
new file mode 100644
index 0000000000..410aba663e
--- /dev/null
+++ b/paddle/gserver/layers/SequenceSliceLayer.cpp
@@ -0,0 +1,228 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "Layer.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/Vector.h"
+#include "paddle/utils/Logging.h"
+#include "paddle/utils/Stat.h"
+
+namespace paddle {
+
+class SequenceSliceLayer : public Layer {
+public:
+  explicit SequenceSliceLayer(const LayerConfig& config) : Layer(config) {}
+
+  bool init(const LayerMap& layerMap,
+            const ParameterMap& parameterMap) override;
+
+  void forward(PassType passType) override;
+  void backward(const UpdateCallback& callback = nullptr) override;
+
+private:
+  // TODO(caoying)
+  // Here selSubSeqIdx is automatically converted from real to int
+  // This is very dangerous if user fill this matrix himself, invalid data
+  // may occur. The selected indices should be stored in CpuSparseMatrix
+  // with SparseValueType set to NO_VALUE.
+  MatrixPtr startIdsOnCpu_;
+  MatrixPtr endIdsOnCpu_;
+
+  std::vector<int> selectedRows_;
+  IVectorPtr rowIndice_;
+  std::vector<std::vector<int>> inputSeqInfoVec_;
+  std::vector<int> outSubSeqStartPos_;
+  std::vector<int> outSeqStartPos_;
+
+  void checkInputs();
+  void copySliceIdsToCpu();
+  void calSelectedRows(const MatrixPtr starts, const MatrixPtr ends);
+};
+
+REGISTER_LAYER(seq_slice, SequenceSliceLayer);
+
+bool SequenceSliceLayer::init(const LayerMap& layerMap,
+                              const ParameterMap& parameterMap) {
+  /* Initialize the basic parent class */
+  Layer::init(layerMap, parameterMap);
+  CHECK_GE(inputLayers_.size(), 2U);
+  CHECK_LE(inputLayers_.size(), 3U);
+
+  setNeedSequenceInfo(false);
+  return true;
+}
+
+void SequenceSliceLayer::checkInputs() {
+  const Argument& inputSeq = getInput(0);
+  CHECK(inputSeq.hasSeq()) << "The first input of sequence slic layer "
+                           << "must be a sequence.";
+  // Check inputs
+  const MatrixPtr indices1 = getInputValue(1);
+  CHECK_EQ(indices1->getHeight(),
+           inputSeq.hasSubseq() ? inputSeq.getNumSubSequences()
+                                : inputSeq.getNumSequences())
+      << "Height of the second input should be equal to number of sequence "
+      << "in the first input.";
+  if (inputLayers_.size() == 3) {
+    const MatrixPtr indices2 = getInputValue(2);
+    CHECK_EQ(indices2->getHeight(), indices1->getHeight())
+        << "start indices and end indices should have the same height.";
+    CHECK_EQ(indices2->getWidth(), indices1->getWidth())
+        << "start indices and end indices should have the same Width.";
+  }
+}
+
+void SequenceSliceLayer::copySliceIdsToCpu() {
+  if (!useGpu_) {
+    if (inputLayers_.size() == 2U) {
+      if (config_.select_first()) {
+        startIdsOnCpu_ = getInputValue(1);
+        endIdsOnCpu_ = nullptr;
+      } else {
+        startIdsOnCpu_ = nullptr;
+        endIdsOnCpu_ = getInputValue(1);
+      }
+    } else if (inputLayers_.size() == 3U) {
+      startIdsOnCpu_ = getInputValue(1);
+      endIdsOnCpu_ = getInputValue(2);
+    }
+    return;
+  }
+
+  const MatrixPtr indices1 = getInputValue(1);
+  if (inputLayers_.size() == 2U) {
+    if (config_.select_first()) {
+      Matrix::resizeOrCreate(startIdsOnCpu_,
+                             indices1->getHeight(),
+                             indices1->getWidth(),
+                             false /* trans */,
+                             false /* useGpu */);
+      startIdsOnCpu_->copyFrom(*indices1);
+      endIdsOnCpu_ = nullptr;
+    } else {
+      Matrix::resizeOrCreate(endIdsOnCpu_,
+                             indices1->getHeight(),
+                             indices1->getWidth(),
+                             false /* trans */,
+                             false /* useGpu */);
+      endIdsOnCpu_->copyFrom(*indices1);
+      startIdsOnCpu_ = nullptr;
+    }
+  } else if (inputLayers_.size() == 3U) {
+    Matrix::resizeOrCreate(startIdsOnCpu_,
+                           indices1->getHeight(),
+                           indices1->getWidth(),
+                           false /* trans */,
+                           false /* useGpu */);
+    startIdsOnCpu_->copyFrom(*indices1);
+
+    const MatrixPtr indices2 = getInputValue(2);
+    Matrix::resizeOrCreate(endIdsOnCpu_,
+                           indices2->getHeight(),
+                           indices2->getWidth(),
+                           false /* trans */,
+                           false /* useGpu */);
+    endIdsOnCpu_->copyFrom(*indices2);
+  }
+}
+
+void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
+                                         const MatrixPtr ends) {
+  outSeqStartPos_.resize(1, 0);
+  outSubSeqStartPos_.resize(1, 0);
+  selectedRows_.clear();
+
+  size_t beamSize = starts ? starts->getWidth() : ends->getWidth();
+  // iterate over sequence
+  size_t rowIdx = 0;
+  for (size_t i = 0; i < inputSeqInfoVec_.size(); ++i) {
+    // iterate over sub-sequence in a sequence
+    for (size_t j = 0; j < inputSeqInfoVec_[i].size() - 1; ++j) {
+      // iterate over each index for slicing.
+      for (size_t k = 0; k < beamSize; ++k) {
+        if (starts) {
+          if (starts->getElement(rowIdx, k) == -1.) break;
+        } else if (ends->getElement(rowIdx, k) == -1.)
+          break;
+
+        int begPos = inputSeqInfoVec_[i][j];
+        if (starts) begPos += starts->getElement(rowIdx, k);
+
+        int endPos = inputSeqInfoVec_[i][j + 1] - 1;
+        if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k);
+
+        int seqLen = endPos - begPos + 1;
+        CHECK(seqLen);
+        for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m);
+        inputSeqInfoVec_.size() > 1
+            ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen)
+            : outSeqStartPos_.push_back(outSeqStartPos_.back() + seqLen);
+      }
+      rowIdx++;
+    }
+    if (inputSeqInfoVec_.size() > 1)
+      outSeqStartPos_.push_back(outSubSeqStartPos_.back());
+  }
+
+  if (useGpu_) {
+    rowIndice_ = IVector::create(selectedRows_.size(), useGpu_);
+    rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size());
+  } else {
+    rowIndice_ =
+        IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_);
+  }
+
+  // create the sequence information for the output.
+  ICpuGpuVector::resizeOrCreate(
+      output_.sequenceStartPositions, outSeqStartPos_.size(), false);
+  output_.sequenceStartPositions->copyFrom(
+      outSeqStartPos_.data(), outSeqStartPos_.size(), false);
+
+  if (inputSeqInfoVec_.size() > 1) {
+    ICpuGpuVector::resizeOrCreate(
+        output_.subSequenceStartPositions, outSubSeqStartPos_.size(), false);
+    output_.subSequenceStartPositions->copyFrom(
+        outSubSeqStartPos_.data(), outSubSeqStartPos_.size(), false);
+  }
+}
+
+void SequenceSliceLayer::forward(PassType passType) {
+  Layer::forward(passType);
+  checkInputs();
+
+  const Argument& inputSeq = getInput(0);
+  inputSeqInfoVec_.clear();
+  Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
+                              inputSeq.subSequenceStartPositions,
+                              inputSeqInfoVec_);
+  copySliceIdsToCpu();
+
+  // calculate the selected row indices in a batch,
+  // and build the output sequence information.
+  calSelectedRows(startIdsOnCpu_ ? startIdsOnCpu_ : nullptr,
+                  endIdsOnCpu_ ? endIdsOnCpu_ : nullptr);
+
+  resetOutput(selectedRows_.size(), getSize());
+
+  getOutputValue()->selectRows(*getInputValue(0), *rowIndice_);
+}
+
+void SequenceSliceLayer::backward(const UpdateCallback& callback) {
+  MatrixPtr inputSeqGrad = getInputGrad(0);
+  MatrixPtr outputGrad = getOutputGrad();
+
+  outputGrad->addToRows(*inputSeqGrad, *rowIndice_);
+}
+
+}  // namespace paddle
diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp
index 76f587fff7..0db0300270 100644
--- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp
+++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp
@@ -52,11 +52,10 @@ private:
    *   ]
    *
    * ths output is saved to private member rowIndice_;
-   * [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
-   *  16,17,18,19,20,21,22,23,24,25,26,27]
+   * [0,1,2,3,4,5,6,7,8,9,15,16,17,18,19,20,21,23,24,25,26,27]
    */
 
-  void calSelectedCols(const MatrixPtr selectedIndices,
+  void calSelectedRows(const MatrixPtr selectedIndices,
                        const std::vector<std::vector<int>>& inputSeqInfo);
 
   // if the second input of this layer is on GPU memory, copy it to CPU memory.
@@ -67,7 +66,7 @@ private:
   std::vector<std::vector<int>> inputSeqInfoVec_;
 
   // the final selected row indices in a batch,
-  // rowIdx_ and selectedRows_ actually share a same memory.
+  // rowIndice_ and selectedRows_ actually share a same memory.
   IVectorPtr rowIndice_;
   std::vector<int> selectedRows_;
 };
@@ -83,7 +82,7 @@ bool SubNestedSequenceLayer::init(const LayerMap& layerMap,
   return true;
 }
 
-void SubNestedSequenceLayer::calSelectedCols(
+void SubNestedSequenceLayer::calSelectedRows(
     const MatrixPtr selectedIndices,
     const std::vector<std::vector<int>>& inputSeqInfo) {
   selectedRows_.clear();
@@ -96,6 +95,11 @@ void SubNestedSequenceLayer::calSelectedCols(
   for (size_t i = 0; i < seqNum; ++i) {
     for (size_t j = 0; j < beamSize; ++j) {
       if (selectedIndices->getElement(i, j) == -1.) break;
+      // TODO(caoying)
+      // Here selSubSeqIdx is automatically converted from real to int
+      // This is very dangerous if user fill this matrix himself, invalid data
+      // may occur. The selected indices should be stored in
+      // CpuSparseMatrix with SparseValueType set to NO_VALUE.
       int selSubSeqIdx = selectedIndices->getElement(i, j);
       CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx);
 
@@ -160,7 +164,7 @@ void SubNestedSequenceLayer::forward(PassType passType) {
   Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
                               inputSeq.subSequenceStartPositions,
                               inputSeqInfoVec_);
-  calSelectedCols(selIdsCpu_, inputSeqInfoVec_);
+  calSelectedRows(selIdsCpu_, inputSeqInfoVec_);
 
   resetOutput(selectedRows_.size(), getSize());
   getOutputValue()->selectRows(*getInputValue(0), *rowIndice_);
diff --git a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
index e456dd5db7..d560ca650b 100644
--- a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
+++ b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
@@ -26,9 +26,9 @@ using namespace std;     // NOLINT
 DECLARE_int32(gpu_id);
 DECLARE_bool(thread_local_rand_use_global_seed);
 
-const int MAX_SEQ_NUM = 5;
-const int MAX_SEQ_LEN = 5;
-const int MAX_BEAM_SIZE = 3;
+const int MAX_SEQ_NUM = 17;
+const int MAX_SEQ_LEN = 23;
+const int MAX_BEAM_SIZE = 13;
 
 vector<real> randSampling(real range, int n) {
   CHECK_GE(range, n);
@@ -46,8 +46,7 @@ void genSeqInfo(vector<int>& seqStartPos, vector<int>& subSeqStartPos) {
   seqStartPos.resize(1, 0);
   subSeqStartPos.resize(1, 0);
 
-  // srand((size_t)(time(NULL)));
-  srand(1);
+  srand((size_t)(time(NULL)));
   int seqNum = 1 + (rand() % MAX_SEQ_NUM);
   for (int i = 0; i < seqNum; ++i) {
     int subSeqNum = 1 + (rand() % MAX_SEQ_NUM);
@@ -105,7 +104,7 @@ void genTestData(vector<int>& seqStartPos,
                  vector<vector<real>>& starts,
                  vector<vector<real>>& ends,
                  bool hasSubseq) {
-  size_t beamSize = MAX_BEAM_SIZE;
+  size_t beamSize = 1 + (rand() % MAX_BEAM_SIZE);
   genSeqInfo(seqStartPos, subSeqStartPos);
 
   genStarts(hasSubseq ? subSeqStartPos : seqStartPos, starts, beamSize);
@@ -167,16 +166,21 @@ void testSeqSliceLayer(bool hasSubseq,
     config.inputDefs.push_back(
         {INPUT_SELF_DEFINE_DATA, "starts", startMatrixPtr});
     config.layerConfig.add_inputs();
+    config.layerConfig.set_select_first(true);
   }
 
   // add end indices
   if (ends.size()) {
     vector<real> endsToVec;
     flatten2dVector(ends, endsToVec);
+
     MatrixPtr endMatrixPtr =
         Matrix::create(ends.size(), ends[0].size(), false, false);
+    endMatrixPtr->copyFrom(endsToVec.data(), endsToVec.size());
+
     config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "ends", endMatrixPtr});
     config.layerConfig.add_inputs();
+    config.layerConfig.set_select_first(false);
   }
 
   testLayerGrad(config, "seq_slice", /*batchSize*/ 100, false, useGpu, false);
@@ -188,10 +192,15 @@ TEST(Layer, SeqSliceLayer) {
   vector<vector<real>> starts;
   vector<vector<real>> ends;
 
+  std::vector<bool> mode = {false};
+#ifndef PADDLE_ONLY_CPU
+  mode.push_back(true);
+#endif
   genSeqInfo(seqStartPos, subSeqStartPos);
-  for (bool hasSubseq : {false, true}) {
+  for (bool hasSubseq : {true, false}) {
+    LOG(INFO) << "hasSubSeq : " << hasSubseq;
     genTestData(seqStartPos, subSeqStartPos, starts, ends, hasSubseq);
-    for (bool useGpu : {false, true}) {
+    for (bool useGpu : mode) {
       vector<vector<real>> tmp;
       testSeqSliceLayer(
           hasSubseq, useGpu, seqStartPos, subSeqStartPos, tmp, ends);
diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index 0547ac93cd..06f7e5245f 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -670,19 +670,28 @@ void Argument::reorganizeSeqInfo(
     const ICpuGpuVectorPtr seqStartPos,
     const ICpuGpuVectorPtr subSeqStartPos,
     std::vector<std::vector<int>>& reorganizedSeqInfo) {
-  int* seqStarts = seqStartPos->getMutableData(false);
-  int* subSeqStarts = subSeqStartPos->getMutableData(false);
+  CHECK(seqStartPos);
 
   int seqNum = seqStartPos->getSize() - 1;
-  reorganizedSeqInfo.resize(seqNum, std::vector<int>());
-  int seqIdx = 0;
-  for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) {
-    reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
-    if (subSeqStarts[i] == seqStarts[seqIdx + 1]) {
-      seqIdx++;
-      if (seqIdx == seqNum) return;
+  int* seqStarts = seqStartPos->getMutableData(false);
+
+  if (subSeqStartPos) {
+    int* subSeqStarts = subSeqStartPos->getMutableData(false);
+    reorganizedSeqInfo.resize(seqNum, std::vector<int>());
+    int seqIdx = 0;
+    for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) {
       reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
+      if (subSeqStarts[i] == seqStarts[seqIdx + 1]) {
+        seqIdx++;
+        if (seqIdx == seqNum) return;
+        reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
+      }
     }
+  } else {
+    reorganizedSeqInfo.resize(1, std::vector<int>(seqNum + 1, 0));
+    memcpy(reorganizedSeqInfo[0].data(),
+           seqStarts,
+           sizeof(int) * seqStartPos->getSize());
   }
 }
 

From b97f020f9c34da04e093deb4691f6286f4017e62 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Thu, 10 Aug 2017 10:37:07 +0800
Subject: [PATCH 008/170] fix unittest error.

---
 paddle/gserver/layers/SequenceSliceLayer.cpp    |  3 +--
 python/paddle/trainer_config_helpers/layers.py  |  1 +
 .../protostr/test_kmax_seq_socre_layer.protostr | 17 +++++------------
 .../tests/configs/test_kmax_seq_socre_layer.py  |  4 +---
 4 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp
index 424f898553..165ee6311a 100644
--- a/paddle/gserver/layers/SequenceSliceLayer.cpp
+++ b/paddle/gserver/layers/SequenceSliceLayer.cpp
@@ -70,9 +70,8 @@ void SequenceSliceLayer::checkInputs() {
   const Argument& inputSeq = getInput(0);
   CHECK(inputSeq.hasSeq()) << "The first input of sequence slic layer "
                            << "must be a sequence.";
-  // Check inputs
   const MatrixPtr indices1 = getInputValue(1);
-  CHECK_EQ(indices1->getHeight(),
+  CHECK_EQ(static_cast<size_t>(indices1->getHeight()),
            inputSeq.hasSubseq() ? inputSeq.getNumSubSequences()
                                 : inputSeq.getNumSequences())
       << "Height of the second input should be equal to number of sequence "
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index e51332da0d..79d24cfe5b 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -6242,6 +6242,7 @@ def seq_slice_layer(input, starts, ends, name=None):
         name, LayerType.SEQ_SLICE, parents=[input], size=input.size)
 
 
+@wrap_name_default()
 @layer_support()
 def kmax_sequence_score_layer(input, name=None, beam_size=1):
     """
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr
index 81bd71f68e..3d32220bfb 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr
@@ -1,12 +1,6 @@
 type: "nn"
 layers {
-  name: "input"
-  type: "data"
-  size: 300
-  active_type: ""
-}
-layers {
-  name: "data"
+  name: "input_seq"
   type: "data"
   size: 128
   active_type: ""
@@ -17,7 +11,7 @@ layers {
   size: 1
   active_type: "exponential"
   inputs {
-    input_layer_name: "data"
+    input_layer_name: "input_seq"
     input_parameter_name: "___fc_layer_0__.w0"
   }
   bias_parameter_name: "___fc_layer_0__.wbias"
@@ -51,15 +45,14 @@ parameters {
   initial_strategy: 0
   initial_smart: false
 }
-input_layer_names: "data"
+input_layer_names: "input_seq"
 output_layer_names: "__kmax_sequence_score_layer_0__"
 sub_models {
   name: "root"
-  layer_names: "input"
-  layer_names: "data"
+  layer_names: "input_seq"
   layer_names: "__fc_layer_0__"
   layer_names: "__kmax_sequence_score_layer_0__"
-  input_layer_names: "data"
+  input_layer_names: "input_seq"
   output_layer_names: "__kmax_sequence_score_layer_0__"
   is_recurrent_layer_group: false
 }
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py
index d245c5a41c..48d0cd55da 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py
@@ -2,9 +2,7 @@
 #coding=utf-8
 from paddle.trainer_config_helpers import *
 
-data = data_layer(name='input', size=300)
-
-data = data_layer(name="data", size=128)
+data = data_layer(name="input_seq", size=128)
 scores = fc_layer(input=data, size=1, act=ExpActivation())
 kmax_seq_id = kmax_sequence_score_layer(input=scores, beam_size=5)
 

From cfb86c4e23d424328066fe8d2fbbacb9c9ead6c1 Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Sun, 13 Aug 2017 09:30:41 +0800
Subject: [PATCH 009/170] Add vol2col and col2vol cuda kernel

---
 paddle/cuda/include/hl_matrix.h           |  58 ++++++++++
 paddle/cuda/include/stub/hl_matrix_stub.h |  15 +++
 paddle/cuda/src/hl_cuda_matrix.cu         | 135 ++++++++++++++++++++++
 3 files changed, 208 insertions(+)

diff --git a/paddle/cuda/include/hl_matrix.h b/paddle/cuda/include/hl_matrix.h
index eb454c59c1..da2ed8cabb 100644
--- a/paddle/cuda/include/hl_matrix.h
+++ b/paddle/cuda/include/hl_matrix.h
@@ -224,4 +224,62 @@ extern void hl_matrix_collect_shared_bias(real* B_d,
 extern void hl_matrix_rotate(
     real* mat, real* matRot, int dimM, int dimN, bool clockWise);
 
+/**
+ * @brief  Matrix vol2Col: Convert 3D volume into col matrix
+ *
+ * @param[in]   matSrc     input matrix.
+ * @param[in]   channel    channel of matSrc.
+ * @param[in]   depth      depth of matSrc.
+ * @param[in]   height     height of matSrc.
+ * @param[in]   width      width of matSrc.
+ * @param[in]   filterD    depth of filter.
+ * @param[in]   filterH    height of filter.
+ * @param[in]   filterW    width of filter.
+ * @param[in]   strideD    stride in the depth.
+ * @param[in]   strideH    stride in the height.
+ * @param[in]   strideW    stride in the width.
+ * @param[in]   paddingD   padding in the depth.
+ * @param[in]   paddingH   padding in the height.
+ * @param[in]   paddingW   padding in the width. 
+ * @param[out]   matDst     output matrix.
+ * 
+ */
+extern void hl_matrix_vol2Col(real* matSrc,
+                 int channel, int depth, int height, int width,
+                 int filterD, int filterH, int filterW,
+                 int strideD, int strideH, int strideW,
+                 int paddingD, int paddingH, int paddingW,
+                 real* matDst);
+
+/**
+ * @brief  Matrix col2Vol: Convert col matrix into 3D volume
+ *
+ * @param[out]  matDst     output matrix.
+ * @param[in]   channel    channel of matDst.
+ * @param[in]   depth      depth of matDst.
+ * @param[in]   height     height of matDst.
+ * @param[in]   width      width of matDst.
+ * @param[in]   filterD    depth of filter.
+ * @param[in]   filterH    height of filter.
+ * @param[in]   filterW    width of filter.
+ * @param[in]   strideD    stride in the depth.
+ * @param[in]   strideH    stride in the height.
+ * @param[in]   strideW    stride in the width.
+ * @param[in]   paddingD   padding in the depth.
+ * @param[in]   paddingH   padding in the height.
+ * @param[in]   paddingW   padding in the width. 
+ * @param[in]   matSrc     input matrix.
+ * @param[in]   beta       input 
+ * @param[in]   alpha      input 
+ * 
+ */
+extern void hl_matrix_col2Vol(real* matDst,
+                int channels, int depth, int height, int width,
+                int filterD, int filterH, int filterW,
+                int strideD, int strideH, int strideW,
+                int paddingD, int paddingH, int paddingW,
+                real* matSrc,
+                real alpha, real beta);
+
+
 #endif /* HL_MATRIX_H_ */
diff --git a/paddle/cuda/include/stub/hl_matrix_stub.h b/paddle/cuda/include/stub/hl_matrix_stub.h
index 127cb7e279..0b73777812 100644
--- a/paddle/cuda/include/stub/hl_matrix_stub.h
+++ b/paddle/cuda/include/stub/hl_matrix_stub.h
@@ -99,4 +99,19 @@ inline void hl_matrix_collect_shared_bias(real* B_d,
 inline void hl_matrix_rotate(
     real* mat, real* matRot, int dimM, int dimN, bool clockWise) {}
 
+inline void hl_matrix_vol2Col(real* data,
+                       int channels, int depth, int height, int width,
+                       int filterD, int filterH, int filterW,
+                       int strideD, int strideH, int strideW,
+                       int paddingD, int paddingH, int paddingW,
+                       real* data_col) {}
+
+inline void hl_matrix_col2Vol(real* data,
+                       int channels, int depth, int height, int width,
+                       int filterD, int filterH, int filterW,
+                       int strideD, int strideH, int strideW,
+                       int paddingD, int paddingH, int paddingW,
+                       real* data_Im,
+                       real alpha, real beta) {}
+
 #endif  // HL_MATRIX_STUB_H_
diff --git a/paddle/cuda/src/hl_cuda_matrix.cu b/paddle/cuda/src/hl_cuda_matrix.cu
index 39272456c3..f626c07a0c 100644
--- a/paddle/cuda/src/hl_cuda_matrix.cu
+++ b/paddle/cuda/src/hl_cuda_matrix.cu
@@ -592,3 +592,138 @@ void hl_matrix_rotate(
       mat, matRot, dimM, dimN, clockWise);
   CHECK_SYNC("hl_matrix_rotate failed");
 }
+
+
+__global__ void keMatrixVol2Col(
+        int num_kernels, real*dataSrc, real* dataDst,
+        int depth, int height, int width,
+        int filterD, int filterH, int filterW,
+        int strideD, int strideH, int strideW,
+        int paddingD, int paddingH, int paddingW,
+        int depth_col, int height_col, int width_col){
+
+  for (int index = blockIdx.x * blockDim.x + threadIdx.x;
+       index < num_kernels;
+       index += blockDim.x * gridDim.x){
+
+    int w_out = index % width_col;
+    int h_out = (index / width_col ) % height_col;
+    int d_out = (index / width_col / height_col) % depth_col;
+    int channel_in = index / width_col / height_col / depth_col;
+    int channel_out = channel_in * filterD * filterH * filterW;
+    int w_in = w_out * strideW - paddingW;
+    int h_in = h_out * strideH - paddingH;
+    int d_in = d_out * strideD - paddingD;
+
+    dataDst += ((channel_out * depth_col + d_out) * height_col + h_out) * width_col + w_out;
+    dataSrc += ((channel_in * depth + d_in) * height + h_in) * width + w_in;
+    for (int k = 0; k < filterD; ++k) {
+      for (int i = 0; i < filterH; ++i) {
+        for (int j = 0; j < filterW; ++j) {
+          int d = d_in + k;
+          int h = h_in + i;
+          int w = w_in + j;
+          *dataDst = (d >= 0 && d < depth && h >= 0 && h < height && w >= 0 && w < width ) ?
+                      dataSrc[(k * height + i) * width + j] : 0;
+          dataDst += depth_col * height_col * width_col;
+        }
+      }
+    }
+  }
+}
+
+void hl_matrix_vol2Col(real* dataSrc,
+                       int channels, int depth, int height, int width,
+                       int filterD, int filterH, int filterW,
+                       int strideD, int strideH, int strideW,
+                       int paddingD, int paddingH, int paddingW, real* dataDst){
+
+  int depth_col = (depth + 2 * paddingD - filterD) / strideD + 1;
+  int height_col = (height + 2 * paddingH - filterH) / strideH + 1;
+  int width_col = (width + 2 * paddingW - filterW) / strideW + 1;
+  int num_kernels = channels * depth_col * height_col * width_col;
+
+  const int threads = 512;
+  const int blocks = DIVUP(num_kernels, threads);
+
+  keMatrixVol2Col<<< blocks, threads >>>(
+          num_kernels, dataSrc, dataDst,
+                  depth, height, width,
+                  filterD, filterH, filterW,
+                  strideD, strideH, strideW,
+                  paddingD, paddingH, paddingW,
+                  depth_col, height_col, width_col);
+  CHECK_SYNC("hl_matrix_vol2Col failed");
+}
+
+__global__ void keMatrixCol2Vol(
+        int num_kernels, real*dataDst, real* dataSrc,
+        int depth, int height, int width,
+        int filterD, int filterH, int filterW,
+        int strideD, int strideH, int strideW,
+        int paddingD, int paddingH, int paddingW,
+        int depth_col, int height_col, int width_col,
+        real alpha, real beta){
+
+  for (int index = blockIdx.x * blockDim.x + threadIdx.x;
+       index < num_kernels;
+       index += blockDim.x * gridDim.x) {
+
+    real val = 0;
+    int w = index % width + paddingW;
+    int h = (index / width) % height + paddingH;
+    int d = (index / width / height) % depth + paddingD;
+    int c = index / (width * height * depth);
+    // compute the start and end of the output
+    int w_col_start = (w < filterW) ? 0 : (w - filterW) / strideW + 1;
+    int w_col_end = min(w / strideW + 1, width_col);
+    int h_col_start = (h < filterH) ? 0 : (h - filterH) / strideH + 1;
+    int h_col_end = min(h / strideH + 1, height_col);
+    int d_col_start = (d < filterD) ? 0 : (d - filterD) / strideD + 1;
+    int d_col_end = min(d / strideD + 1, depth_col);
+
+    int offset = (c * filterD * filterW * filterH + \
+                  d * filterW * filterH + h * filterW + w) * depth_col * height_col * width_col;
+
+    int coeff_d_col = (1 - strideD * filterW * filterH * depth_col) * height_col * width_col;
+    int coeff_h_col = (1 - strideH * filterW * depth_col * height_col) * width_col;
+    int coeff_w_col = (1 - strideW * depth_col * height_col * width_col);
+
+    for (int d_col = d_col_start; d_col < d_col_end; ++d_col) {
+      for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
+        for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
+          val += dataSrc[offset + d_col * coeff_d_col + h_col * coeff_h_col + w_col * coeff_w_col];
+        }
+      }
+    }
+    dataDst[index] = val;
+  }
+}
+
+void hl_matrix_col2Vol(real* dataDst,
+                       int channels, int depth, int height, int width,
+                       int filterD, int filterH, int filterW,
+                       int strideD, int strideH, int strideW,
+                       int paddingD, int paddingH, int paddingW,
+                       real* dataSrc,
+                       real alpha, real beta){
+
+  int depth_col = (depth + 2 * paddingD - filterD) / strideD + 1;
+  int height_col = (height + 2 * paddingH - filterH) / strideH + 1;
+  int width_col = (width + 2 * paddingW - filterW) / strideW + 1;
+  int num_kernels = channels * depth * height * width;
+
+  const int threads = 512;
+  const int blocks = DIVUP(num_kernels, threads);
+
+  keMatrixCol2Vol<<< blocks, threads >>>(
+          num_kernels, dataDst, dataSrc,
+                  depth, height, width,
+                  filterD, filterH, filterW,
+                  strideD, strideH, strideW,
+                  paddingD, paddingH, paddingW,
+                  depth_col, height_col, width_col,
+                  alpha, beta);
+
+  CHECK_SYNC("hl_matrix_col2Vol failed");
+}

From 8cc0eb9c5d564b71452e65d1bac3f9f19f5bf89e Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Sun, 13 Aug 2017 09:38:02 +0800
Subject: [PATCH 010/170] Modify ConvConfig, Add depth dimension

---
 proto/ModelConfig.proto | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto
index 4f3d5bf3f6..043ae502b0 100644
--- a/proto/ModelConfig.proto
+++ b/proto/ModelConfig.proto
@@ -82,6 +82,12 @@ message ConvConfig {
 
   // if not set, use img_size
   optional uint32 img_size_y = 14;
+  
+  optional uint32 filter_size_z = 15 [ default = 1 ];
+  optional uint32 padding_z = 16 [ default = 1 ];
+  optional uint32 stride_z = 17 [ default = 1 ];
+  optional uint32 output_z = 18 [ default = 1 ];
+  optional uint32 img_size_z = 19 [ default = 1 ];
 }
 
 message PoolConfig {
@@ -631,4 +637,4 @@ message ModelConfig {
   // For External Machine, defining how to split a neural network
   // into multiple parts.
   optional ExternalConfig external_config = 9;
-};
+};
\ No newline at end of file

From 5d7f6dde52af781e15953c041374b5671bdf918d Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Sun, 13 Aug 2017 09:42:48 +0800
Subject: [PATCH 011/170] Add depth dimension information to ConvBaseLayer

---
 paddle/gserver/layers/ConvBaseLayer.cpp | 17 +++++++++++++----
 paddle/gserver/layers/ConvBaseLayer.h   |  8 ++++++++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/paddle/gserver/layers/ConvBaseLayer.cpp b/paddle/gserver/layers/ConvBaseLayer.cpp
index e161d89c38..e437b0b86e 100644
--- a/paddle/gserver/layers/ConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ConvBaseLayer.cpp
@@ -21,9 +21,11 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
                          const ParameterMap& parameterMap) {
   /* Initialize the basic parent class */
   Layer::init(layerMap, parameterMap);
-  isDeconv_ = (config_.type() == "exconv" || config_.type() == "cudnn_conv")
-                  ? false
-                  : true;
+  isDeconv_ = (config_.type() == "exconv" ||
+          config_.type() == "cudnn_conv" ||
+          config_.type() == "conv3d" ||
+          config_.type() == "deconv3d"   )
+                  ? false : true;
 
   /* Initialize the convolutional layer parameter */
   numFilters_ = config_.num_filters();
@@ -36,7 +38,6 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
     paddingY_.push_back(conf.padding_y());
     strideY_.push_back(conf.stride_y());
     filterSizeY_.push_back(conf.filter_size_y());
-    filterPixels_.push_back(filterSize_.back() * filterSizeY_.back());
     channels_.push_back(conf.channels());
     imgSizeH_.push_back(conf.has_img_size_y() ? conf.img_size_y()
                                               : conf.img_size());
@@ -45,6 +46,14 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
     filterChannels_.push_back(conf.filter_channels());
     outputH_.push_back(conf.has_output_y() ? conf.output_y() : conf.output_x());
     outputW_.push_back(conf.output_x());
+
+    paddingZ_.push_back(conf.padding_z());
+    strideZ_.push_back(conf.stride_z());
+    filterSizeZ_.push_back(conf.filter_size_z());
+    imgSizeD_.push_back(conf.img_size_z());
+    outputD_.push_back(conf.output_z());
+    filterPixels_.push_back(
+            filterSize_.back() * filterSizeY_.back() * filterSizeZ_.back());
   }
 
   CHECK(inputLayers_.size() == parameters_.size());
diff --git a/paddle/gserver/layers/ConvBaseLayer.h b/paddle/gserver/layers/ConvBaseLayer.h
index e9d15d94f8..8d1fd989e8 100644
--- a/paddle/gserver/layers/ConvBaseLayer.h
+++ b/paddle/gserver/layers/ConvBaseLayer.h
@@ -23,6 +23,7 @@ namespace paddle {
  * with learned filters and (optionally) adds biases.
  */
 
+
 class ConvBaseLayer : public Layer {
 protected:
   typedef std::vector<int> IntV;
@@ -58,6 +59,13 @@ protected:
   IntV outputH_;
   /// The spatial dimensions of output feature map width.
   IntV outputW_;
+
+  IntV outputD_;
+  IntV imgSizeD_;
+  IntV filterSizeZ_;
+  IntV strideZ_;
+  IntV paddingZ_;
+
   /// Group size, refer to grouped convolution in
   /// Alex Krizhevsky's paper: when group=2, the first half of the
   /// filters are only connected to the first half of the input channels,

From 11975b4f9185907b5f2518722e5311d744361887 Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Sun, 13 Aug 2017 09:47:37 +0800
Subject: [PATCH 012/170] Add Conv3DLayer

---
 paddle/gserver/layers/Conv3DLayer.cpp | 225 ++++++++++++++++++++++++++
 paddle/gserver/layers/Conv3DLayer.h   |  57 +++++++
 2 files changed, 282 insertions(+)
 create mode 100644 paddle/gserver/layers/Conv3DLayer.cpp
 create mode 100644 paddle/gserver/layers/Conv3DLayer.h

diff --git a/paddle/gserver/layers/Conv3DLayer.cpp b/paddle/gserver/layers/Conv3DLayer.cpp
new file mode 100644
index 0000000000..0fa9c5f9f5
--- /dev/null
+++ b/paddle/gserver/layers/Conv3DLayer.cpp
@@ -0,0 +1,225 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/utils/Logging.h"
+#include "paddle/utils/Stat.h"
+#include "Conv3DLayer.h"
+
+namespace paddle {
+
+REGISTER_LAYER(conv3d, Conv3DLayer);
+
+bool Conv3DLayer::init(const LayerMap &layerMap,
+                       const ParameterMap &parameterMap) {
+  if (!ConvBaseLayer::init(layerMap, parameterMap))
+      return false;
+  int index = 0;
+  for (auto &inputConfig : config_.inputs()) {
+      const ConvConfig &conf = inputConfig.conv_conf();
+      M_.push_back(numFilters_ / conf.groups());
+      K_.push_back(
+              conf.filter_channels() * conf.filter_size_z() * \
+      conf.filter_size_y() * conf.filter_size());
+      weights_[index]->getW()->reshape(
+              weights_[index]->getW()->getWidth(),
+              weights_[index]->getW()->getHeight());
+      weights_[index]->getWGrad()->reshape(
+              weights_[index]->getWGrad()->getWidth(),
+              weights_[index]->getWGrad()->getHeight());
+      ++index;
+  }
+  biases_->getWGrad()->reshape(
+          biases_->getWGrad()->width_, biases_->getWGrad()->height_);
+  biases_->getW()->reshape(
+          biases_->getW()->width_, biases_->getW()->height_);
+  CHECK(inputLayers_.size() == parameters_.size());
+  return true;
+}
+
+
+size_t Conv3DLayer::getSize() {
+  CHECK_NE(inputLayers_.size(), 0UL);
+  // imgSizeH_.clear();
+  // imgSizeW_.clear();
+  // imgSizeD_.clear();
+  outputH_.clear();
+  outputW_.clear();
+  outputD_.clear();
+  N_.clear();
+  size_t layerSize = 0;
+  for (size_t i = 0; i < inputLayers_.size(); ++i) {
+      // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight());
+      // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth());
+      // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth());
+      outputW_.push_back(outputSize(
+              imgSizeW_[i], filterSize_[i],
+              padding_[i], stride_[i], true));
+      outputH_.push_back(outputSize(
+              imgSizeH_[i], filterSizeY_[i],
+              paddingY_[i], strideY_[i], true));
+      outputD_.push_back(outputSize(
+              imgSizeD_[i], filterSizeZ_[i],
+              paddingZ_[i], strideZ_[i], true));
+
+      N_.push_back(outputD_[i] * outputH_[i] * outputW_[i]);
+      CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize);
+      layerSize += N_[i] * numFilters_;
+  }
+  getOutput().setFrameHeight(outputH_[0]);
+  getOutput().setFrameWidth(outputW_[0]);
+  getOutput().setFrameDepth(outputD_[0]);
+  return layerSize;
+}
+
+void Conv3DLayer::forward(PassType passType) {
+  Layer::forward(passType);
+
+  int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
+  int outWidth = getSize();
+  resetOutput(batchSize, outWidth);
+  const MatrixPtr outMat = getOutputValue();
+
+  for (size_t i = 0; i != inputLayers_.size(); ++i) {
+      REGISTER_TIMER_INFO("FwdConv3D", getName().c_str());
+      const MatrixPtr& inMat = getInputValue(i);
+      int width = inMat->getWidth();
+      int M = M_[i];
+      int N = N_[i];
+      int K = K_[i];
+      Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
+      MatrixPtr wMat = weights_[i]->getW();
+      for (int n = 0; n < batchSize; ++n) {
+          colBuf_->vol2Col(inMat->getData() + n * width, channels_[i],
+                           imgSizeD_[i], imgSizeH_[i], imgSizeW_[i],
+                           filterSizeZ_[i], filterSizeY_[i], filterSize_[i],
+                           strideZ_[i], strideY_[i], stride_[i],
+                           paddingZ_[i], paddingY_[i], padding_[i]);
+
+          real *outData = outMat->getData() + n * outWidth;
+          MatrixPtr outMatSub =
+                  Matrix::create(outData, groups_[i] * M, N, false, useGpu_);
+          for (int g = 0; g < groups_[i]; g++) {
+              MatrixPtr wMatSub = wMat->subMatrix(g * M, M);
+              MatrixPtr in = colBuf_->subMatrix(g * K, K);
+              MatrixPtr out = outMatSub->subMatrix(g * M, M);
+              out->mul(*wMatSub, *in, 1.0, 0.0);
+          }
+      }
+  }
+  if (nullptr != this->biasParameter_) {
+      REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str());
+      this->addBias();
+  }
+  forwardActivation();
+}
+
+void Conv3DLayer::backward(const UpdateCallback &callback) {
+  backwardActivation();
+
+  if (biases_ && biases_->getWGrad()) {
+      bpropBiases();
+      biases_->getParameterPtr()->incUpdate(callback);
+  }
+
+  for (size_t i = 0; i != inputLayers_.size(); ++i) {
+      REGISTER_TIMER_INFO("BwdConv3D", getName().c_str());
+      if (weights_[i]->getWGrad()) {
+          bpropWeights(i);
+      }
+      if (this->needGradient_) {
+          bpropData(i);
+      }
+      REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
+      weights_[i]->getParameterPtr()->incUpdate(callback);
+  }
+}
+
+void Conv3DLayer::bpropWeights(int i) {
+  int M = M_[i];
+  int N = N_[i];
+  int K = K_[i];
+  const MatrixPtr& inMat = getInputValue(i);
+  int width = inMat->getWidth();
+  Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
+  MatrixPtr wGradMat = weights_[i]->getWGrad();
+  real* outGradData = getOutputGrad()->getData();
+  int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
+
+  for (int n = 0; n < batchSize; ++n) {
+      colBuf_->vol2Col(inMat->getData() + n * width, channels_[i],
+                       imgSizeD_[i], imgSizeH_[i], imgSizeW_[i],
+                       filterSizeZ_[i], filterSizeY_[i], filterSize_[i],
+                       strideZ_[i], strideY_[i], stride_[i],
+                       paddingZ_[i], paddingY_[i], padding_[i]);
+      outGradData += n * getOutputGrad()->getWidth();
+      MatrixPtr outGradSub =
+              Matrix::create(outGradData, groups_[i] * M, N, false, useGpu_);
+      for (int g = 0; g < groups_[i]; ++g) {
+          MatrixPtr inMatSub = colBuf_->subMatrix(g * K, K);
+          MatrixPtr outG = outGradSub->subMatrix(g * M, M);
+          MatrixPtr wGradSub = wGradMat->subMatrix(g * M, M);
+          wGradSub->mul(*outG, *(inMatSub->getTranspose()), 1.0, 1.0);
+      }
+  }
+}
+
+void Conv3DLayer::bpropData(int i) {
+  int M = M_[i];
+  int N = N_[i];
+  int K = K_[i];
+  Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
+  MatrixPtr wMat = weights_[i]->getW();
+  real* outGradData = getOutputGrad()->getData();
+  real* preGradData = getInputGrad(i)->getData();
+  int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
+  for (int n = 0; n < batchSize; ++n) {
+      outGradData += n * getOutputGrad()->getWidth();
+      preGradData += n * getInputGrad(i)->getWidth();
+      MatrixPtr outGradSub =
+              Matrix::create(outGradData, M * groups_[i], N, false, useGpu_);
+      for (int g = 0; g < groups_[i]; ++g) {
+          MatrixPtr wMatSub = wMat->subMatrix(g * M, M);
+          MatrixPtr outG = outGradSub->subMatrix(g * M, M);
+          MatrixPtr inGradMatSub = colBuf_->subMatrix(g * K, K);
+          inGradMatSub->mul(*(wMatSub->getTranspose()), *outG, 1.0, 0.0);
+      }
+      colBuf_->col2Vol(preGradData, channels_[i],
+                       imgSizeD_[i], imgSizeH_[i], imgSizeW_[i],
+                       filterSizeZ_[i], filterSizeY_[i], filterSize_[i],
+                       strideZ_[i], strideY_[i], stride_[i],
+                       paddingZ_[i], paddingY_[i], padding_[i],
+                       1.0, 1.0);
+  }
+}
+
+void Conv3DLayer::bpropBiases() {
+  MatrixPtr outGradMat = getOutputGrad();
+  if (this->sharedBiases_) {
+      biases_->getWGrad()->collectSharedBias(*outGradMat, 1.0f);
+  } else {
+      biases_->getWGrad()->collectBias(*outGradMat, 1.0f);
+  }
+}
+
+void Conv3DLayer::addBias() {
+  MatrixPtr outMat = getOutputValue();
+
+  if (this->sharedBiases_) {
+      outMat->addSharedBias(*(biases_->getW()), 1.0f);
+  } else {
+      outMat->addBias(*(biases_->getW()), 1.0f);
+  }
+}
+
+}  // namespace paddle
diff --git a/paddle/gserver/layers/Conv3DLayer.h b/paddle/gserver/layers/Conv3DLayer.h
new file mode 100644
index 0000000000..703671e5d0
--- /dev/null
+++ b/paddle/gserver/layers/Conv3DLayer.h
@@ -0,0 +1,57 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+
+#pragma once
+
+#include "ConvBaseLayer.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/MathUtils.h"
+#include <vector>
+
+namespace paddle {
+
+/**
+ * @brief A subclass of convolution layer.
+ * This layer expands input and use matrix multiplication to
+ * calculate convolution operation.
+ */
+class Conv3DLayer : public ConvBaseLayer {
+public:
+  explicit Conv3DLayer(const LayerConfig& config) : ConvBaseLayer(config) {}
+
+  ~Conv3DLayer() {}
+
+  bool init(const LayerMap &layerMap, const ParameterMap &parameterMap);
+
+  size_t getSize();
+
+  void forward(PassType passType);
+  void addBias();
+
+  void backward(const UpdateCallback& callback);
+
+  void bpropBiases();
+  void bpropData(int i);
+  void bpropWeights(int i);
+
+protected:
+  // Figure out the dimensions for individual gemms.
+  IntV M_;  /// numFilters_ / filter_group_;
+  IntV N_;  /// channels_ * filterSizeZ_ * filterSize_ * filterSizeY_
+  IntV K_;  /// outputD_ * outputH_ * outputW_
+  MatrixPtr colBuf_;
+};
+
+}  // namespace paddle

From 23cf0c61e066f54b360efc4e17576a056868b050 Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Sun, 13 Aug 2017 09:48:59 +0800
Subject: [PATCH 013/170] Add DeConv3DLayer

---
 paddle/gserver/layers/DeConv3DLayer.cpp | 211 ++++++++++++++++++++++++
 paddle/gserver/layers/DeConv3DLayer.h   |  58 +++++++
 2 files changed, 269 insertions(+)
 create mode 100644 paddle/gserver/layers/DeConv3DLayer.cpp
 create mode 100644 paddle/gserver/layers/DeConv3DLayer.h

diff --git a/paddle/gserver/layers/DeConv3DLayer.cpp b/paddle/gserver/layers/DeConv3DLayer.cpp
new file mode 100644
index 0000000000..8de40b681d
--- /dev/null
+++ b/paddle/gserver/layers/DeConv3DLayer.cpp
@@ -0,0 +1,211 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/utils/Logging.h"
+#include "paddle/utils/Stat.h"
+#include "DeConv3DLayer.h"
+
+namespace paddle {
+
+REGISTER_LAYER(deconv3d, DeConv3DLayer);
+
+#define DECONV_OUTPUT_SIZE(IN_SIZE, STRID, PAD, KSIZE) \
+    (((IN_SIZE) - 1) * (STRID) - 2 * (PAD) + (KSIZE))
+
+bool DeConv3DLayer::init(const LayerMap &layerMap,
+                     const ParameterMap &parameterMap) {
+  if (!ConvBaseLayer::init(layerMap, parameterMap)) return false;
+  // for Deconv, the dimension of Kernel is
+  // channel * output * depth * height * weigth
+  // Matrix storage format: (output * depth * height * weigth) x  channel
+  for (int index = 0; index < config_.inputs().size(); ++index) {
+    M_.push_back(filterChannels_[index]);
+    K_.push_back(
+            filterPixels_[index] * (numFilters_/groups_[index]));
+    weights_[index]->getW()->reshape(
+            filterPixels_[index] * numFilters_,
+            filterChannels_[index]);
+    weights_[index]->getWGrad()->reshape(
+            filterPixels_[index] * numFilters_,
+            filterChannels_[index]);
+  }
+  biases_->getWGrad()->reshape(
+          biases_->getWGrad()->width_, biases_->getWGrad()->height_);
+  biases_->getW()->reshape(
+          biases_->getW()->width_, biases_->getW()->height_);
+  CHECK(inputLayers_.size() == parameters_.size());
+  return true;
+}
+
+
+size_t DeConv3DLayer::getSize() {
+  CHECK_NE(inputLayers_.size(), 0UL);
+  // imgSizeH_.clear();
+  // imgSizeW_.clear();
+  // imgSizeD_.clear();
+  outputH_.clear();
+  outputW_.clear();
+  outputD_.clear();
+  N_.clear();
+  No_.clear();
+  size_t layerSize = 0;
+  for (size_t i = 0; i < inputLayers_.size(); ++i) {
+    // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight());
+    // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth());
+    // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth());
+    outputW_.push_back(
+            DECONV_OUTPUT_SIZE(
+                    imgSizeW_[i], stride_[i],
+                    padding_[i], filterSize_[i]));
+    outputH_.push_back(
+            DECONV_OUTPUT_SIZE(
+                    imgSizeH_[i], strideY_[i],
+                    paddingY_[i], filterSizeY_[i]));
+    outputD_.push_back(
+            DECONV_OUTPUT_SIZE(
+                    imgSizeD_[i], strideZ_[i],
+                    paddingZ_[i], filterSizeZ_[i]));
+    No_.push_back(outputD_[i] * outputH_[i] * outputW_[i]);
+    N_.push_back(imgSizeD_[i] * imgSizeH_[i] * imgSizeW_[i]);
+    CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize);
+    layerSize += No_[i] * numFilters_;
+  }
+  getOutput().setFrameHeight(outputH_[0]);
+  getOutput().setFrameWidth(outputW_[0]);
+  getOutput().setFrameDepth(outputD_[0]);
+  return layerSize;
+}
+
+void DeConv3DLayer::forward(PassType passType) {
+  Layer::forward(passType);
+  int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
+  int outWidth = getSize();
+  resetOutput(batchSize, outWidth);
+  const MatrixPtr outMat = getOutputValue();
+
+  for (size_t i = 0; i != inputLayers_.size(); ++i) {
+    REGISTER_TIMER_INFO("FwdDeConv3D", getName().c_str());
+    const MatrixPtr& inMat = getInputValue(i);
+    int width = inMat->getWidth();
+    int M = M_[i];
+    int N = N_[i];
+    int K = K_[i];
+    MatrixPtr wMat = weights_[i]->getW();
+    Matrix::resizeOrCreate(colBuf_, K * groups_[i] , N, false, useGpu_);
+
+    for (int n = 0; n < batchSize; ++n) {
+      real *inData = inMat->getData() + n * width;
+      real *colBufData = colBuf_->getData();
+      for (int g = 0; g < groups_[i]; g++) {
+         MatrixPtr wMatSub = wMat->subMatrix(g * K, K);
+         MatrixPtr inMatSub =
+                 Matrix::create(inData, M, N, false, useGpu_);
+         MatrixPtr colBufDataSub =
+                 Matrix::create(colBufData, K, N, false, useGpu_);
+         colBufDataSub->mul(*wMatSub, *inMatSub, 1.0, 0.0);
+         colBufData += K * N;
+         inData += M * N;
+      }
+      colBuf_->col2Vol(outMat->getData()+ n * outMat->getWidth(),
+                       numFilters_, outputD_[i], outputH_[i], outputW_[i],
+                       filterSizeZ_[i], filterSizeY_[i], filterSize_[i],
+                       strideZ_[i], strideY_[i], stride_[i],
+                       paddingZ_[i], paddingY_[i], padding_[i], 1.0, 1.0);
+    }
+  }
+  if (nullptr != this->biasParameter_) {
+    REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str());
+    this->addBias();
+  }
+  forwardActivation();
+}
+
+void DeConv3DLayer::backward(const UpdateCallback &callback) {
+  backwardActivation();
+  int batchSize = getOutputGrad()->getHeight();
+  int outputWidth = getOutputGrad()->getWidth();
+  if (biases_ && biases_->getWGrad()) {
+    bpropBiases();
+    biases_->getParameterPtr()->incUpdate(callback);
+  }
+  for (size_t i =0; i < inputLayers_.size(); ++i) {
+    int M = M_[i];
+    int N = N_[i];
+    int K = K_[i];
+    Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
+    const MatrixPtr& inMat = getInputValue(i);
+    for (int n = 0; n < batchSize; ++n) {
+      REGISTER_TIMER_INFO("BwdDeConv3D", getName().c_str());
+      if (weights_[i]->getWGrad() || this->needGradient_) {
+        colBuf_->vol2Col(getOutputGrad()->getData() + n * outputWidth,
+                     numFilters_, outputD_[i], outputH_[i], outputW_[i],
+                     filterSizeZ_[i], filterSizeY_[i], filterSize_[i],
+                     strideZ_[i], strideY_[i], stride_[i],
+                     paddingZ_[i], paddingY_[i], padding_[i]);
+      }
+      if (weights_[i]->getWGrad()) {
+        real *inData = inMat->getData() + n * inMat->getWidth();;
+        real *wGradData = weights_[i]->getWGrad()->getData();
+        for (int g = 0; g < groups_[i]; g++) {
+          MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K);
+          MatrixPtr inMatSub = Matrix::create(
+                  inData, M, N, false, useGpu_);
+          MatrixPtr wGradMatSub = Matrix::create(
+                  wGradData, K, M, false, useGpu_);
+          wGradMatSub->mul(*colBufDataSub,
+                  *(inMatSub->getTranspose()), 1.0, 1.0);
+          wGradData += K * M;
+          inData += M * N;
+        }
+        weights_[i]->getParameterPtr()->incUpdate(callback);
+      }
+      if (this->needGradient_) {
+        real* preGrad = getInputGrad(i)->getData();
+        for (int g = 0; g < groups_[i]; ++g) {
+          MatrixPtr w = weights_[i]->getW()->subMatrix(g * K, K);
+          MatrixPtr outGradMat = colBuf_->subMatrix(g * K, K);
+          MatrixPtr inGradMatSub = Matrix::create(
+                  preGrad, M, N, false, useGpu_);
+          inGradMatSub->mul(*(w->getTranspose()), *outGradMat, 1.0, 0.0);
+          preGrad += M * N;
+        }
+      }
+      REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
+    }
+  }
+}
+
+void DeConv3DLayer::bpropWeights(int i) { }
+void DeConv3DLayer::bpropData(int i) {  }
+
+void DeConv3DLayer::bpropBiases() {
+  MatrixPtr outGradMat = getOutputGrad();
+
+  if (this->sharedBiases_) {
+    biases_->getWGrad()->collectSharedBias(*outGradMat, 1.0f);
+  } else {
+    biases_->getWGrad()->collectBias(*outGradMat, 1.0f);
+  }
+}
+
+void DeConv3DLayer::addBias() {
+  MatrixPtr outMat = getOutputValue();
+  if (this->sharedBiases_) {
+    outMat->addSharedBias(*(biases_->getW()), 1.0f);
+  } else {
+    outMat->addBias(*(biases_->getW()), 1.0f);
+  }
+}
+
+}  // namespace paddle
diff --git a/paddle/gserver/layers/DeConv3DLayer.h b/paddle/gserver/layers/DeConv3DLayer.h
new file mode 100644
index 0000000000..435807fe5d
--- /dev/null
+++ b/paddle/gserver/layers/DeConv3DLayer.h
@@ -0,0 +1,58 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+
+#pragma once
+
+#include "ConvBaseLayer.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/MathUtils.h"
+#include <vector>
+
+namespace paddle {
+
+/**
+ * @brief A subclass of deconvolution3D layer.
+ * This layer expands input and use matrix multiplication to
+ * calculate deconvolution3D operation.
+ */
+class DeConv3DLayer : public ConvBaseLayer {
+public:
+    explicit DeConv3DLayer(const LayerConfig& config) : ConvBaseLayer(config) {}
+
+    ~DeConv3DLayer() {}
+
+    bool init(const LayerMap &layerMap, const ParameterMap &parameterMap);
+
+    size_t getSize();
+
+    void forward(PassType passType);
+    void addBias();
+
+    void backward(const UpdateCallback& callback);
+
+    void bpropBiases();
+    void bpropData(int i);
+    void bpropWeights(int i);
+
+protected:
+    // Figure out the dimensions for individual gemms.
+    IntV M_;  /// numFilters_ / filter_group_;
+    IntV N_;  /// channels_ * filterSizeZ_ * filterSize_ * filterSizeY_
+    IntV K_;  /// outputD_ * outputH_ * outputW_
+    IntV No_;
+    MatrixPtr colBuf_;
+};
+
+}  // namespace paddle

From 52ceeedba5ca1371302414a0ad11ff93d9ed7d9a Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Sun, 13 Aug 2017 09:51:39 +0800
Subject: [PATCH 014/170] Add col2vol and vol2col CPU funtion

---
 paddle/math/Matrix.cpp | 135 +++++++++++++++++++++++++++++++++++++++++
 paddle/math/Matrix.h   |  64 +++++++++++++++++++
 2 files changed, 199 insertions(+)

diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp
index 27f7d95b75..66868e73b3 100644
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
@@ -1389,6 +1389,52 @@ void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) {
       output_d, grad_d, mat_d, height_, width_);
 }
 
+void GpuMatrix::vol2Col(real* data,
+                int channels,
+                int depth,
+                int height,
+                int width,
+                int filterD,
+                int filterH,
+                int filterW,
+                int strideD,
+                int strideH,
+                int strideW,
+                int paddingD,
+                int paddingH,
+                int paddingW) {
+  hl_matrix_vol2Col(data,
+          channels, depth, height, width,
+          filterD, filterH, filterW,
+          strideD, strideH, strideW,
+          paddingD, paddingH, paddingW, getData());
+}
+
+void GpuMatrix::col2Vol(real* trg,
+                int channels,
+                int depth,
+                int height,
+                int width,
+                int filterD,
+                int filterH,
+                int filterW,
+                int strideD,
+                int strideH,
+                int strideW,
+                int paddingD,
+                int paddingH,
+                int paddingW,
+                real alpha,
+                real beta) {
+  hl_matrix_col2Vol(trg,
+                    channels, depth, height, width,
+                    filterD, filterH, filterW,
+                    strideD, strideH, strideW,
+                    paddingD, paddingH, paddingW,
+                    getData(),
+                    alpha, beta);
+   }
+
 /**
  * CpuMatrix
  */
@@ -3975,6 +4021,95 @@ void CpuMatrix::bilinearBackward(const Matrix& out,
   }
 }
 
+void CpuMatrix::vol2Col(real* data,
+                        int channels,
+                        int depth,
+                        int height,
+                        int width,
+                        int filterD,
+                        int filterH,
+                        int filterW,
+                        int strideD,
+                        int strideH,
+                        int strideW,
+                        int paddingD,
+                        int paddingH,
+                        int paddingW) {
+  real* outData = getData();
+  int outHeight = (height + 2 * paddingH - filterH) / strideH + 1;
+  int outWidth = (width + 2 * paddingW - filterW) / strideW + 1;
+  int outDepth = (depth + 2 * paddingD - filterD) / strideD + 1;
+
+  int channelsCol = channels * filterD * filterH * filterW;
+  for (int c = 0; c < channelsCol; ++c) {
+    int wOffset = c % filterW;
+    int hOffset = (c / filterW) % filterH;
+    int dOffset = (c / filterW / filterH) % filterD;
+    int cIn = c / filterW / filterH / filterD;
+    for (int d = 0; d < outDepth; ++d) {
+      for (int h = 0; h < outHeight; ++h) {
+        for (int w = 0; w < outWidth; ++w) {
+          int dPad = d * strideD - paddingD + dOffset;
+          int hPad = h * strideH - paddingH + hOffset;
+          int wPad = w * strideW - paddingW + wOffset;
+
+          if (hPad >= 0 && hPad < height && wPad >= 0 && wPad < width &&
+              dPad >= 0 && dPad < depth)
+            outData[((c * outDepth + d) * outHeight + h) * outWidth + w] =
+                data[((cIn * depth + dPad) * height + hPad) * width + wPad];
+          else
+            outData[((c * outDepth + d) * outHeight + h) * outWidth + w] = 0;
+        }
+      }
+    }
+  }
+}
+
+void CpuMatrix::col2Vol(real* trg,
+                        int channels,
+                        int depth,
+                        int height,
+                        int width,
+                        int filterD,
+                        int filterH,
+                        int filterW,
+                        int strideD,
+                        int strideH,
+                        int strideW,
+                        int paddingD,
+                        int paddingH,
+                        int paddingW,
+                        real alpha,
+                        real beta) {
+  real* src = getData();
+  int outDepth =  (depth + 2 * paddingH - filterD) / strideD + 1;
+  int outHeight = (height + 2 * paddingH - filterH) / strideH + 1;
+  int outWidth = (width + 2 * paddingW - filterW) / strideW + 1;
+  int channelsCol = channels * filterD * filterH * filterW;
+  for (int c = 0; c < channelsCol; ++c) {
+    int wOffset = c % filterW;
+    int hOffset = (c / filterW) % filterH;
+    int dOffset = (c / filterW / filterH) % filterD;
+    int cIm = c / filterW / filterH / filterD;
+    for (int d = 0; d < outDepth; ++d) {
+      for (int h = 0; h < outHeight; ++h) {
+        for (int w = 0; w < outWidth; ++w) {
+          int dPad = d * strideD - paddingD + dOffset;
+          int hPad = h * strideH - paddingH + hOffset;
+          int wPad = w * strideW - paddingW + wOffset;
+          if (hPad >= 0 && hPad < height && wPad >= 0 && wPad < width &&
+              dPad >= 0 && dPad < depth)
+            trg[((cIm * depth + dPad) * height + hPad) * width + wPad] =
+                alpha *
+                    src[((c * outDepth + d) * outHeight + h) * outWidth + w] +
+                beta *
+                    trg[((cIm * depth + dPad) * height + hPad) * width + wPad];
+        }
+      }
+    }
+  }
+}
+
 ////////////////////////////////////////////////////////////////
 //               functions executed via cpu                   //
 ////////////////////////////////////////////////////////////////
diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h
index bb802bbb2c..4354996ce0 100644
--- a/paddle/math/Matrix.h
+++ b/paddle/math/Matrix.h
@@ -1039,6 +1039,42 @@ public:
     LOG(FATAL) << "Not implemented";
   }
 
+  virtual void vol2Col(real* data,
+                 int channels,
+                 int depth,
+                 int height,
+                 int width,
+                 int filterD,
+                 int filterH,
+                 int filterW,
+                 int strideD,
+                 int strideH,
+                 int strideW,
+                 int paddingD,
+                 int paddingH,
+                 int paddingW) {
+      LOG(FATAL) << "Not implemeted";
+    }
+
+    virtual void col2Vol(real* trg,
+                 int channels,
+                 int depth,
+                 int height,
+                 int width,
+                 int filterD,
+                 int filterH,
+                 int filterW,
+                 int strideD,
+                 int strideH,
+                 int strideW,
+                 int paddingD,
+                 int paddingH,
+                 int paddingW,
+                 real alpha,
+                 real beta) {
+      LOG(FATAL) << "Not implemeted";
+    }
+
   virtual void bilinearForward(const Matrix& in,
                                const size_t inImgH,
                                const size_t inImgW,
@@ -1374,6 +1410,20 @@ public:
                         const real ratioH,
                         const real ratioW);
 
+  void vol2Col(real* data,
+                 int channels,
+                 int depth, int height, int width,
+                 int filterD, int filterH, int filterW,
+                 int strideD, int strideH, int strideW,
+                 int paddingD, int paddingH, int paddingW);
+
+  void col2Vol(real* trg,
+               int channels, int depth, int height, int width,
+               int filterD, int filterH, int filterW,
+               int strideD, int strideH, int strideW,
+               int paddingD, int paddingH, int paddingW,
+               real alpha, real beta);
+
   void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label);
 
   void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label);
@@ -1715,6 +1765,20 @@ public:
                         const real ratioH,
                         const real ratioW);
 
+  void vol2Col(real* data,
+               int channels,
+               int depth, int height, int width,
+               int filterD, int filterH, int filterW,
+               int strideD, int strideH, int strideW,
+               int paddingD, int paddingH, int paddingW);
+
+  void col2Vol(real* trg,
+               int channels, int depth, int height, int width,
+               int filterD, int filterH, int filterW,
+               int strideD, int strideH, int strideW,
+               int paddingD, int paddingH, int paddingW,
+               real alpha, real beta);
+
   template <typename ExpressionType>
   void operator=(const ExpressionType& expr) {
     TensorCpuApply<real>(*this, expr);

From 9b3d6acdbfc2fd6bc26185ddb9c38dfb90632324 Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Sun, 13 Aug 2017 09:54:10 +0800
Subject: [PATCH 015/170] Add depth dimension information to Argument

---
 paddle/parameter/Argument.cpp | 2 ++
 paddle/parameter/Argument.h   | 8 +++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index 0547ac93cd..77fd0c5890 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -186,6 +186,7 @@ void Argument::resizeAndCopyFrom(const Argument& src,
   resizeAndCopy(strs, src.strs, useGpu, stream);
   frameWidth = src.frameWidth;
   frameHeight = src.frameHeight;
+  frameDepth = src.frameDepth;
 }
 
 int32_t Argument::resizeAndCopyFrom(const Argument& src,
@@ -206,6 +207,7 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src,
   dataId = src.dataId;
   frameWidth = src.frameWidth;
   frameHeight = src.frameHeight;
+  frameDepth = src.frameDepth;
 
   if (!src.sequenceStartPositions) {
     // non-sequence input, copy samples directly
diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h
index d8d7a4398f..ba3ad2fd4d 100644
--- a/paddle/parameter/Argument.h
+++ b/paddle/parameter/Argument.h
@@ -1,11 +1,8 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-
     http://www.apache.org/licenses/LICENSE-2.0
-
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -35,6 +32,7 @@ struct Argument {
         strs(nullptr),
         frameHeight(0),
         frameWidth(0),
+        frameDepth(0),
         sequenceStartPositions(nullptr),
         subSequenceStartPositions(nullptr),
         cpuSequenceDims(nullptr),
@@ -64,6 +62,7 @@ struct Argument {
     allCount = argument.allCount;
     frameHeight = argument.frameHeight;
     frameWidth = argument.frameWidth;
+    frameDepth = argument.frameDepth;
     dataId = argument.dataId;
   }
 
@@ -76,6 +75,7 @@ struct Argument {
   // A dataBatch includes batchSize frames, one frame maybe not only vector
   size_t frameHeight;
   size_t frameWidth;
+  size_t frameDepth;
 
   // If NULL, each position is treated independently.
   // Otherwise, its size should be #NumberOfSequences + 1.
@@ -136,8 +136,10 @@ struct Argument {
   }
   size_t getFrameHeight() const { return frameHeight; }
   size_t getFrameWidth() const { return frameWidth; }
+  size_t getFrameDepth() const { return frameDepth; }
   void setFrameHeight(size_t h) { frameHeight = h; }
   void setFrameWidth(size_t w) { frameWidth = w; }
+  void setFrameDepth(size_t d) { frameDepth = d; }
 
   int64_t getNumSequences() const {
     return sequenceStartPositions ? sequenceStartPositions->getSize() - 1

From 44ae44da49f206af56d02816aff8e9b2920d0bf8 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Mon, 14 Aug 2017 09:01:22 +0800
Subject: [PATCH 016/170] add configuratioin helpers.

---
 python/paddle/trainer/config_parser.py        |  16 ++
 .../paddle/trainer_config_helpers/layers.py   |  34 ++-
 .../tests/configs/file_list.sh                |   2 +-
 .../test_cross_entropy_over_beam.protostr     | 208 ++++++++++++++++++
 .../configs/test_cross_entropy_over_beam.py   |  39 ++++
 5 files changed, 295 insertions(+), 4 deletions(-)
 create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
 create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py

diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index da99e5bd53..a24299787b 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -1602,6 +1602,21 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase):
         self.config.softmax_selfnorm_alpha = softmax_selfnorm_alpha
 
 
+@config_layer('cross_entropy_over_beam')
+class CrossEntropyOverBeamLayer(LayerBase):
+    def __init__(self, name, inputs, **xargs):
+        config_assert(len(inputs) % 3 == 0, "Error input numbers.")
+        super(CrossEntropyOverBeamLayer, self).__init__(
+            name, 'cross_entropy_over_beam', 0, inputs, **xargs)
+        input_num = len(inputs) / 3
+        for i in range(input_num):
+            input_layer = self.get_input_layer(i * 2)
+            config_assert(
+                input_layer.size == 1, "Inputs for this layer are made up of "
+                "several pairs and the first one in a pair is scores for "
+                "all the candidates, so its size should be equal to 1.")
+
+
 @config_layer('fc')
 class FCLayer(LayerBase):
     layer_type = 'fc'
@@ -2249,6 +2264,7 @@ def define_cost(class_name, cost_type):
 
 
 define_cost('MultiClassCrossEntropy', 'multi-class-cross-entropy')
+define_cost('CrossEntropyOverBeamCostLayer', 'cross_entropy_over_beam')
 define_cost('RankingCost', 'rank-cost')
 define_cost('AucValidation', 'auc-validation')
 define_cost('PnpairValidation', 'pnpair-validation')
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 1bc55c8696..2b01b6ad4d 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import functools
 import collections
 import inspect
@@ -104,6 +103,7 @@ __all__ = [
     'nce_layer',
     'cross_entropy_with_selfnorm',
     'cross_entropy',
+    'cross_entropy_over_beam',
     'multi_binary_label_cross_entropy',
     'sum_cost',
     'rank_cost',
@@ -219,6 +219,7 @@ class LayerType(object):
     HUBER = 'huber'
     CROSS_ENTROPY = 'multi-class-cross-entropy'
     CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm'
+    CROSS_ENTROPY_OVER_BEAM = 'cross_entropy_over_beam'
     SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy'
     MULTI_BIN_LABEL_CROSS_ENTROPY = 'multi_binary_label_cross_entropy'
     SUM_COST = 'sum_cost'
@@ -4028,8 +4029,12 @@ def __cost_input__(input, label, weight=None):
     """
     inputs and parents for cost layers.
     """
-    ipts = [Input(input.name), Input(label.name)]
-    parents = [input, label]
+    if isinstance(input, LayerOutput):
+        input = [input]
+    if isinstance(label, LayerOutput):
+        label = [label]
+    ipts = [Input(ipt.name) for ipt in (input + label)]
+    parents = [ipt for ipt in (input + label)]
     if weight is not None:
         assert weight.size == 1
         ipts.append(Input(weight.name))
@@ -5692,6 +5697,29 @@ def multi_binary_label_cross_entropy(input,
         size=1)
 
 
+@wrap_name_default()
+@layer_support()
+def cross_entropy_over_beam(input, label, name=None, coeff=1.0, weight=None):
+    """
+    TODO(caoying) add comments.
+    """
+
+    assert len(input) / 2 == len(label), "Error input numbers."
+    for i in range(0, len(input), 2):
+        assert (input[i].size == 1), (
+            "Inputs for this layer are made up of "
+            "several pairs and the first one in a pair is scores for "
+            "all the candidates, so its size should be equal to 1.")
+
+    ipts, parents = __cost_input__(input, label, weight)
+    Layer(
+        name=name,
+        type=LayerType.CROSS_ENTROPY_OVER_BEAM,
+        inputs=ipts,
+        coeff=coeff)
+    return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1)
+
+
 @wrap_name_default()
 @layer_support()
 def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None):
diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
index a61beb871a..130e6332a7 100755
--- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
@@ -8,6 +8,6 @@ test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
 test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
 test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
 test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
-test_kmax_seq_socre_layer test_seq_select_layers)
+test_kmax_seq_socre_layer test_seq_select_layers test_cross_entropy_over_beam)
 
 export whole_configs=(test_split_datasource)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
new file mode 100644
index 0000000000..e44478ec2b
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
@@ -0,0 +1,208 @@
+type: "nn"
+layers {
+  name: "sentence_states"
+  type: "data"
+  size: 32
+  active_type: ""
+}
+layers {
+  name: "sentence_scores"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "__kmax_sequence_score_layer_0__"
+  type: "kmax_seq_score"
+  active_type: ""
+  inputs {
+    input_layer_name: "sentence_scores"
+  }
+  beam_size: 5
+}
+layers {
+  name: "__sub_nested_seq_layer_0__"
+  type: "sub_nested_seq"
+  size: 32
+  active_type: ""
+  inputs {
+    input_layer_name: "sentence_states"
+  }
+  inputs {
+    input_layer_name: "__kmax_sequence_score_layer_0__"
+  }
+}
+layers {
+  name: "__fc_layer_0__"
+  type: "fc"
+  size: 1
+  active_type: ""
+  inputs {
+    input_layer_name: "__sub_nested_seq_layer_0__"
+    input_parameter_name: "___fc_layer_0__.w0"
+  }
+  bias_parameter_name: "___fc_layer_0__.wbias"
+}
+layers {
+  name: "__kmax_sequence_score_layer_1__"
+  type: "kmax_seq_score"
+  active_type: ""
+  inputs {
+    input_layer_name: "sentence_scores"
+  }
+  beam_size: 5
+}
+layers {
+  name: "__seq_slice_layer_0__"
+  type: "seq_slice"
+  size: 32
+  active_type: ""
+  inputs {
+    input_layer_name: "__sub_nested_seq_layer_0__"
+  }
+  inputs {
+    input_layer_name: "__kmax_sequence_score_layer_1__"
+  }
+  select_first: true
+}
+layers {
+  name: "__fc_layer_1__"
+  type: "fc"
+  size: 1
+  active_type: ""
+  inputs {
+    input_layer_name: "__seq_slice_layer_0__"
+    input_parameter_name: "___fc_layer_1__.w0"
+  }
+  bias_parameter_name: "___fc_layer_1__.wbias"
+}
+layers {
+  name: "__kmax_sequence_score_layer_2__"
+  type: "kmax_seq_score"
+  active_type: ""
+  inputs {
+    input_layer_name: "__fc_layer_1__"
+  }
+  beam_size: 5
+}
+layers {
+  name: "sentences_ids"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "start_ids"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "end_ids"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "__cross_entropy_over_beam_0__"
+  type: "cross_entropy_over_beam"
+  active_type: ""
+  inputs {
+    input_layer_name: "sentence_scores"
+  }
+  inputs {
+    input_layer_name: "__kmax_sequence_score_layer_0__"
+  }
+  inputs {
+    input_layer_name: "__fc_layer_0__"
+  }
+  inputs {
+    input_layer_name: "__kmax_sequence_score_layer_1__"
+  }
+  inputs {
+    input_layer_name: "__fc_layer_1__"
+  }
+  inputs {
+    input_layer_name: "__kmax_sequence_score_layer_2__"
+  }
+  inputs {
+    input_layer_name: "sentences_ids"
+  }
+  inputs {
+    input_layer_name: "start_ids"
+  }
+  inputs {
+    input_layer_name: "end_ids"
+  }
+  coeff: 1.0
+}
+parameters {
+  name: "___fc_layer_0__.w0"
+  size: 32
+  initial_mean: 0.0
+  initial_std: 0.176776695297
+  dims: 32
+  dims: 1
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___fc_layer_0__.wbias"
+  size: 1
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 1
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___fc_layer_1__.w0"
+  size: 32
+  initial_mean: 0.0
+  initial_std: 0.176776695297
+  dims: 32
+  dims: 1
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___fc_layer_1__.wbias"
+  size: 1
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 1
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "sentence_scores"
+input_layer_names: "sentence_states"
+input_layer_names: "sentences_ids"
+input_layer_names: "start_ids"
+input_layer_names: "end_ids"
+output_layer_names: "__cross_entropy_over_beam_0__"
+sub_models {
+  name: "root"
+  layer_names: "sentence_states"
+  layer_names: "sentence_scores"
+  layer_names: "__kmax_sequence_score_layer_0__"
+  layer_names: "__sub_nested_seq_layer_0__"
+  layer_names: "__fc_layer_0__"
+  layer_names: "__kmax_sequence_score_layer_1__"
+  layer_names: "__seq_slice_layer_0__"
+  layer_names: "__fc_layer_1__"
+  layer_names: "__kmax_sequence_score_layer_2__"
+  layer_names: "sentences_ids"
+  layer_names: "start_ids"
+  layer_names: "end_ids"
+  layer_names: "__cross_entropy_over_beam_0__"
+  input_layer_names: "sentence_scores"
+  input_layer_names: "sentence_states"
+  input_layer_names: "sentences_ids"
+  input_layer_names: "start_ids"
+  input_layer_names: "end_ids"
+  output_layer_names: "__cross_entropy_over_beam_0__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
new file mode 100644
index 0000000000..edc2d32fca
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+#coding=utf-8
+
+from paddle.trainer_config_helpers import *
+beam_size = 5
+
+# the first beam expansion.
+sentence_states = data_layer(name="sentence_states", size=32)
+sentence_scores = data_layer(name="sentence_scores", size=1)
+topk_sentence_ids = kmax_sequence_score_layer(
+    input=sentence_scores, beam_size=beam_size)
+
+# the second beam expansion.
+topk_sen = sub_nested_seq_layer(
+    input=sentence_states, selected_indices=topk_sentence_ids)
+start_pos_scores = fc_layer(input=topk_sen, size=1, act=LinearActivation())
+topk_start_pos_ids = kmax_sequence_score_layer(
+    input=sentence_scores, beam_size=beam_size)
+
+# the final beam expansion.
+topk_start_spans = seq_slice_layer(
+    input=topk_sen, starts=topk_start_pos_ids, ends=None)
+end_pos_scores = fc_layer(
+    input=topk_start_spans, size=1, act=LinearActivation())
+topk_end_pos_ids = kmax_sequence_score_layer(
+    input=end_pos_scores, beam_size=beam_size)
+
+# define the cost
+sentence_idx = data_layer(name="sentences_ids", size=1)
+start_idx = data_layer(name="start_ids", size=1)
+end_idx = data_layer(name="end_ids", size=1)
+cost = cross_entropy_over_beam(
+    input=[
+        sentence_scores, topk_sentence_ids, start_pos_scores,
+        topk_start_pos_ids, end_pos_scores, topk_end_pos_ids
+    ],
+    label=[sentence_idx, start_idx, end_idx])
+
+outputs(cost)

From 05e8a26b4bb093f9dccb9aeb533a5851aaed09b8 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Mon, 14 Aug 2017 10:33:28 +0800
Subject: [PATCH 017/170] add unittest.

---
 .../gserver/layers/CrossEntropyOverBeam.cpp   | 35 +++++++
 paddle/gserver/layers/CrossEntropyOverBeam.h  | 31 ++++++
 paddle/gserver/tests/CMakeLists.txt           |  6 ++
 paddle/gserver/tests/LayerGradUtil.cpp        | 25 +++--
 paddle/gserver/tests/LayerGradUtil.h          | 18 ++++
 .../tests/test_CrossEntropyOverBeamGrad.cpp   | 94 +++++++++++++++++++
 6 files changed, 201 insertions(+), 8 deletions(-)
 create mode 100644 paddle/gserver/layers/CrossEntropyOverBeam.cpp
 create mode 100644 paddle/gserver/layers/CrossEntropyOverBeam.h
 create mode 100644 paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp

diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
new file mode 100644
index 0000000000..8b6223ec6a
--- /dev/null
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
@@ -0,0 +1,35 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "CrossEntropyOverBeam.h"
+
+namespace paddle {
+
+REGISTER_LAYER(cross_entropy_over_beam, CrossEntropyOverBeam);
+
+bool CrossEntropyOverBeam::init(const LayerMap& layerMap,
+                                const ParameterMap& parameterMap) {
+  /* Initialize the basic parent class */
+  Layer::init(layerMap, parameterMap);
+
+  setNeedSequenceInfo(false);
+
+  return true;
+}
+
+void CrossEntropyOverBeam::forward(PassType passType) {}
+
+void CrossEntropyOverBeam::backward(const UpdateCallback& callback) {}
+
+}  // namespace paddle
diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.h b/paddle/gserver/layers/CrossEntropyOverBeam.h
new file mode 100644
index 0000000000..3106f9858b
--- /dev/null
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.h
@@ -0,0 +1,31 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "CrossEntropyOverBeam.h"
+#include "Layer.h"
+
+namespace paddle {
+
+class CrossEntropyOverBeam : public Layer {
+public:
+  explicit CrossEntropyOverBeam(const LayerConfig& config) : Layer(config) {}
+  bool init(const LayerMap& layerMap,
+            const ParameterMap& parameterMap) override;
+  void forward(PassType passType) override;
+  void backward(const UpdateCallback& callback) override;
+};
+
+}  // namespace paddle
diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt
index c2a2993620..24df7e7220 100644
--- a/paddle/gserver/tests/CMakeLists.txt
+++ b/paddle/gserver/tests/CMakeLists.txt
@@ -34,6 +34,12 @@ add_unittest_without_exec(test_CRFLayerGrad
 add_test(NAME test_CRFLayerGrad
     COMMAND test_CRFLayerGrad)
 
+################ test_CrossEntropyOverBeam ####################
+add_unittest_without_exec(test_CrossEntropyOverBeam
+    test_CrossEntropyOverBeamGrad.cpp
+    LayerGradUtil.cpp)
+add_test(NAME test_CrossEntropyOverBeam
+    COMMAND test_CrossEntropyOverBeam)
 
 add_unittest_without_exec(test_ActivationGrad
     test_ActivationGrad.cpp
diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp
index fd9cfa1dc7..a38880e14c 100644
--- a/paddle/gserver/tests/LayerGradUtil.cpp
+++ b/paddle/gserver/tests/LayerGradUtil.cpp
@@ -388,14 +388,23 @@ void initDataLayer(TestConfig testConf,
         data.grad->zeroMem();
         break;
       case INPUT_SELF_DEFINE_DATA: {
-        size_t height = testConf.inputDefs[i].selfDefinedData->getHeight();
-        size_t width = testConf.inputDefs[i].selfDefinedData->getWidth();
-        CHECK_GT(static_cast<int>(height), 0);
-        CHECK_GT(static_cast<int>(width), 0);
-        data.value = Matrix::create(height, width, false, useGpu);
-        data.grad = Matrix::create(height, width, false, useGpu);
-        data.value->copyFrom(*testConf.inputDefs[i].selfDefinedData);
-        data.grad->zeroMem();
+        if (testConf.inputDefs[i].ids.size()) {
+          data.ids = IVector::create(testConf.inputDefs[i].ids.size(), useGpu);
+          data.ids->copyFrom(testConf.inputDefs[i].ids.data(),
+                             testConf.inputDefs[i].ids.size());
+        } else if (testConf.inputDefs[i].selfDefinedData) {
+          size_t height = testConf.inputDefs[i].selfDefinedData->getHeight();
+          size_t width = testConf.inputDefs[i].selfDefinedData->getWidth();
+          CHECK_GT(static_cast<int>(height), 0);
+          CHECK_GT(static_cast<int>(width), 0);
+          data.value = Matrix::create(height, width, false, useGpu);
+          data.grad = Matrix::create(height, width, false, useGpu);
+          data.value->copyFrom(*testConf.inputDefs[i].selfDefinedData);
+          data.grad->zeroMem();
+        } else {
+          LOG(FATAL) << "No self-defined data are given.";
+          return;
+        }
 
         const std::vector<int>& labelSeqStartPositions =
             testConf.inputDefs[i].labelSeqStartPositions;
diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h
index 5debedf5ef..a35edd2b5e 100644
--- a/paddle/gserver/tests/LayerGradUtil.h
+++ b/paddle/gserver/tests/LayerGradUtil.h
@@ -68,6 +68,7 @@ struct InputDef {
   std::vector<int> labelInitValue;
   std::vector<int> labelSeqStartPositions;
   std::vector<int> labelSubSeqStartPositions;
+  std::vector<int> ids;
   MatrixPtr selfDefinedData;
 
   InputDef(InputType type, string nameIn, size_t dimIn, size_t sizeIn) {
@@ -95,6 +96,23 @@ struct InputDef {
     isStatic = false;
   }
 
+  InputDef(InputType type,
+           string nameIn,
+           std::vector<int> ids,
+           std::vector<int> selfDefinedSeqStartPos = {},
+           std::vector<int> selfDefinedSubSeqStartPos = {})
+      : labelSeqStartPositions(selfDefinedSeqStartPos),
+        labelSubSeqStartPositions(selfDefinedSubSeqStartPos),
+        ids(ids) {
+    selfDefinedData = nullptr;
+    inputType = type;
+    name = nameIn;
+    dim = 0;
+    sparse = {""};
+    paraSize = 0;
+    isStatic = false;
+  }
+
   InputDef(InputType type,
            string nameIn,
            size_t dimIn,
diff --git a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
new file mode 100644
index 0000000000..54daba3656
--- /dev/null
+++ b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
@@ -0,0 +1,94 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <sstream>
+
+#include <gtest/gtest.h>
+#include "ModelConfig.pb.h"
+#include "paddle/gserver/layers/DataLayer.h"
+#include "paddle/trainer/Trainer.h"
+
+#include "LayerGradUtil.h"
+#include "paddle/testing/TestUtil.h"
+
+using namespace paddle;  // NOLINT
+
+DECLARE_int32(gpu_id);
+DECLARE_bool(thread_local_rand_use_global_seed);
+
+struct SingleBeamExpansion {
+  vector<int> seqStartPos;
+  vector<int> subSeqStartPos;
+
+  vector<real> candidateScores;
+  // TODO(caoying): store this into Argument.ids
+  vector<real> selectedIndices;
+  vector<int> groundTruth;
+};
+
+void genRandomBeamExpansion(size_t expansionCount,
+                            vector<SingleBeamExpansion>& beamExpansions) {
+  beamExpansions.clear();
+}
+
+void testCrossEntropyOverBeam() {
+  const size_t expansionCount = 3;
+  vector<SingleBeamExpansion> beams;
+  genRandomBeamExpansion(expansionCount, beams);
+
+  for (size_t i = 0; i < beams.size(); ++i) {
+    const SingleBeamExpansion& beam = beams[i];
+    // create scores for all the candidates
+    MatrixPtr candidateScorePtr =
+        Matrix::create(beam.candidateScores.size(), 1, false, false);
+    candidateScorePtr->copyFrom(candidateScores.data(), candidateScores.size());
+
+    ostringstream paramName;
+    paramName << "candidate_scores_" << i;
+    beam.subSeqStartPos.size()
+        ? config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
+                                      ostr.str(),
+                                      candidateScorePtr,
+                                      beam.seqStartPos,
+                                      beam.subSeqStartPos})
+        : config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
+                                      ostr.str(),
+                                      candidateScorePtr,
+                                      beam.seqStartPos});
+    // create indices for the selected candidates
+
+    // create the ground truth
+  }
+}
+
+TestConfig config;
+config.layerConfig.set_type("cross_entropy_over_beam");
+
+// testLayerGrad(
+//     config, "cross_entropy_over_beam", seqNum, false, useGpu, false);
+}
+
+TEST(Layer, CrossEntropyOverBeam) {
+  for (bool useGpu : {false, true}) testCrossEntropyOverBeam(useGpu);
+}
+
+int main(int argc, char** argv) {
+  initMain(argc, argv);
+  hl_start();
+  hl_init(FLAGS_gpu_id);
+  FLAGS_thread_local_rand_use_global_seed = true;
+  srand(1);
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}

From e6db484d154c041c1cf6650743bcf27dd2549b77 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Mon, 14 Aug 2017 15:51:00 +0800
Subject: [PATCH 018/170] make clear that current huber_cost is for
 two-classification

---
 paddle/gserver/layers/CostLayer.cpp           | 29 ++++++++++---------
 paddle/gserver/layers/CostLayer.h             | 18 +++++-------
 paddle/gserver/tests/test_LayerGrad.cpp       |  2 +-
 python/paddle/trainer/config_parser.py        |  2 +-
 .../paddle/trainer_config_helpers/layers.py   | 27 ++++++++++++-----
 .../protostr/test_cost_layers.protostr        | 10 +++----
 .../tests/configs/test_cost_layers.py         |  2 +-
 7 files changed, 50 insertions(+), 40 deletions(-)

diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp
index 6bfdea3c6e..138c86a6d6 100644
--- a/paddle/gserver/layers/CostLayer.cpp
+++ b/paddle/gserver/layers/CostLayer.cpp
@@ -575,10 +575,10 @@ void MultiBinaryLabelCrossEntropy::backwardImp(Matrix& output,
 //
 // Huber loss for robust 2-classes classification
 //
-REGISTER_LAYER(huber, HuberTwoClass);
+REGISTER_LAYER(huber, HuberTwoClassification);
 
-bool HuberTwoClass::init(const LayerMap& layerMap,
-                         const ParameterMap& parameterMap) {
+bool HuberTwoClassification::init(const LayerMap& layerMap,
+                                  const ParameterMap& parameterMap) {
   CostLayer::init(layerMap, parameterMap);
   if (useGpu_) {
     tmpCpuInput_.reserve(inputLayers_.size());
@@ -589,7 +589,9 @@ bool HuberTwoClass::init(const LayerMap& layerMap,
   return true;
 }
 
-void HuberTwoClass::forwardImp(Matrix& output, Argument& label, Matrix& cost) {
+void HuberTwoClassification::forwardImp(Matrix& output,
+                                        Argument& label,
+                                        Matrix& cost) {
   if (useGpu_) {
     for (size_t i = 0; i < inputLayers_.size(); i++) {
       tmpCpuInput_[i].resizeAndCopyFrom(
@@ -600,10 +602,11 @@ void HuberTwoClass::forwardImp(Matrix& output, Argument& label, Matrix& cost) {
   forwardImpIn(output, label, cost);
 }
 
-void HuberTwoClass::forwardImpIn(Matrix& output,
-                                 Argument& label,
-                                 Matrix& target) {
+void HuberTwoClassification::forwardImpIn(Matrix& output,
+                                          Argument& label,
+                                          Matrix& target) {
   size_t numSamples = target.getHeight();
+  CHECK(label.ids);
   CHECK_EQ((*label.ids).getSize(), numSamples);
   CHECK_EQ(output.getHeight(), numSamples);
   CHECK_EQ(output.getWidth(), (size_t)1);
@@ -624,9 +627,9 @@ void HuberTwoClass::forwardImpIn(Matrix& output,
   target.copyFrom(cost.data(), numSamples);
 }
 
-void HuberTwoClass::backwardImp(Matrix& outputValue,
-                                Argument& label,
-                                Matrix& outputGrad) {
+void HuberTwoClassification::backwardImp(Matrix& outputValue,
+                                         Argument& label,
+                                         Matrix& outputGrad) {
   if (useGpu_) {
     backwardImpIn(
         *tmpCpuInput_[0].value, tmpCpuInput_[1], *tmpCpuInput_[0].grad);
@@ -636,9 +639,9 @@ void HuberTwoClass::backwardImp(Matrix& outputValue,
   }
 }
 
-void HuberTwoClass::backwardImpIn(Matrix& output,
-                                  Argument& label,
-                                  Matrix& outputG) {
+void HuberTwoClassification::backwardImpIn(Matrix& output,
+                                           Argument& label,
+                                           Matrix& outputG) {
   size_t numSamples = output.getHeight();
   real* out = output.getData();
   real* grad = outputG.getData();
diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h
index 14c0b33ec1..77427b7a08 100644
--- a/paddle/gserver/layers/CostLayer.h
+++ b/paddle/gserver/layers/CostLayer.h
@@ -307,21 +307,17 @@ public:
 /**
  * Huber loss for robust 2-classes classification.
  *
- * For label={0, 1}, let y=2*label-1. Given output f, the loss is:
- * \f[
- * Loss =
- * \left\{\begin{matrix}
- *  4 * y * f     &   \textit{if}  \ \  y* f < -1 \\
- *  (1 - y * f)^2 &  \textit{if}   \ \  -1 < y * f < 1  \\
- *  0             &                    \textit{otherwise}
- * \end{matrix}\right.
- * \f]
+ * For label={0, 1}, let y=2*label-1. Given output f(x), the loss is:
+ * Loss = 4 * y * f, if y* f < -1 \\
+ * Loss = (1 - y * f)^2, if -1 < y * f < 1  \\
+ * Loss = 0, otherwise
  */
-class HuberTwoClass : public CostLayer {
+class HuberTwoClassification : public CostLayer {
   std::vector<Argument> tmpCpuInput_;
 
 public:
-  explicit HuberTwoClass(const LayerConfig& config) : CostLayer(config) {}
+  explicit HuberTwoClassification(const LayerConfig& config)
+      : CostLayer(config) {}
 
   bool init(const LayerMap& layerMap,
             const ParameterMap& parameterMap) override;
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index 0f312b6ca5..6d60250f6d 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -830,7 +830,7 @@ TEST(Layer, square_error_weighted) {
 
 TEST(Layer, huber_two_class) {
   TestConfig config;
-  config.layerConfig.set_type("huber");
+  config.layerConfig.set_type("huber_classification");
   config.biasSize = 0;
 
   config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0});
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index da99e5bd53..248da9417f 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -2255,7 +2255,7 @@ define_cost('PnpairValidation', 'pnpair-validation')
 define_cost('SumOfSquaresCostLayer', 'square_error')
 define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy')
 define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy')
-define_cost('HuberTwoClass', 'huber')
+define_cost('HuberTwoClassification', 'huber_classification')
 define_cost('SumCost', 'sum_cost')
 define_cost('SmoothL1Cost', 'smooth_l1')
 
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 1bc55c8696..20d96efe15 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -108,7 +108,7 @@ __all__ = [
     'sum_cost',
     'rank_cost',
     'lambda_cost',
-    'huber_cost',
+    'huber_classification_cost',
     'block_expand_layer',
     'maxout_layer',
     'out_prod_layer',
@@ -216,7 +216,7 @@ class LayerType(object):
 
     RANK_COST = 'rank-cost'
     LAMBDA_COST = 'lambda_cost'
-    HUBER = 'huber'
+    HUBER_CLASSIFICATION = 'huber_classification'
     CROSS_ENTROPY = 'multi-class-cross-entropy'
     CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm'
     SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy'
@@ -5605,16 +5605,26 @@ def sum_cost(input, name=None, layer_attr=None):
 
 @wrap_name_default()
 @layer_support()
-def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None):
+def huber_classification_cost(input,
+                              label,
+                              name=None,
+                              coeff=1.0,
+                              layer_attr=None):
     """
-    A loss layer for huber loss.
+    For classification purposes, a variant of the Huber loss called modified Huber 
+    is sometimes used. Given a prediction f(x) (a real-valued classifier score) and 
+    a true binary class label :math:`y\in \left \{-1, 1 \right \}`, the modified Huber 
+    loss is defined as:
+
+    .. math:
+       loss = \max \left ( 0, 1-yf(x) \right )^2, yf(x)\geq 1 
+       loss = -4yf(x), \text{otherwise}
 
     The example usage is:
 
     .. code-block:: python
 
-       cost = huber_cost(input=input_layer,
-                         label=label_layer)
+       cost = huber_classification_cost(input=input_layer, label=label_layer)
 
     :param input: The first input layer.
     :type input: LayerOutput.
@@ -5634,11 +5644,12 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None):
         assert input.size == 1
     Layer(
         name=name,
-        type=LayerType.HUBER,
+        type=LayerType.HUBER_CLASSIFICATION,
         inputs=[input.name, label.name],
         coeff=coeff,
         **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(name, LayerType.HUBER, parents=[input, label], size=1)
+    return LayerOutput(
+        name, LayerType.HUBER_CLASSIFICATION, parents=[input, label], size=1)
 
 
 @wrap_name_default()
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr
index 05847344be..a64e5ea0dd 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr
@@ -180,8 +180,8 @@ layers {
   active_type: ""
 }
 layers {
-  name: "__huber_cost_0__"
-  type: "huber"
+  name: "__huber_classification_cost_0__"
+  type: "huber_classification"
   size: 1
   active_type: ""
   inputs {
@@ -300,7 +300,7 @@ output_layer_names: "__rank_cost_0__"
 output_layer_names: "__lambda_cost_0__"
 output_layer_names: "__cross_entropy_0__"
 output_layer_names: "__cross_entropy_with_selfnorm_0__"
-output_layer_names: "__huber_cost_0__"
+output_layer_names: "__huber_classification_cost_0__"
 output_layer_names: "__multi_binary_label_cross_entropy_0__"
 output_layer_names: "__sum_cost_0__"
 output_layer_names: "__nce_layer_0__"
@@ -326,7 +326,7 @@ sub_models {
   layer_names: "__cross_entropy_with_selfnorm_0__"
   layer_names: "huber_probs"
   layer_names: "huber_label"
-  layer_names: "__huber_cost_0__"
+  layer_names: "__huber_classification_cost_0__"
   layer_names: "__multi_binary_label_cross_entropy_0__"
   layer_names: "__sum_cost_0__"
   layer_names: "__nce_layer_0__"
@@ -349,7 +349,7 @@ sub_models {
   output_layer_names: "__lambda_cost_0__"
   output_layer_names: "__cross_entropy_0__"
   output_layer_names: "__cross_entropy_with_selfnorm_0__"
-  output_layer_names: "__huber_cost_0__"
+  output_layer_names: "__huber_classification_cost_0__"
   output_layer_names: "__multi_binary_label_cross_entropy_0__"
   output_layer_names: "__sum_cost_0__"
   output_layer_names: "__nce_layer_0__"
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
index d2a3b702a1..98bf026d60 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
@@ -33,7 +33,7 @@ outputs(
         input=probs, label=xe_label),
     cross_entropy_with_selfnorm(
         input=probs, label=xe_label),
-    huber_cost(
+    huber_classification_cost(
         input=data_layer(
             name='huber_probs', size=1),
         label=data_layer(

From af1eb31afc92ae3ac59869a6a5b0e890e009c44b Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Fri, 11 Aug 2017 11:55:56 -0700
Subject: [PATCH 019/170] add as an operator

---
 paddle/operators/CMakeLists.txt |  2 ++
 paddle/operators/gather_op.cc   | 64 +++++++++++++++++++++++++++++++++
 paddle/operators/gather_op.h    | 52 +++++++++++++++++++++++++++
 3 files changed, 118 insertions(+)
 create mode 100644 paddle/operators/gather_op.cc
 create mode 100644 paddle/operators/gather_op.h

diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index a7c89787e4..5ac898a8d3 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -43,6 +43,8 @@ endfunction()
 
 add_subdirectory(math)
 cc_test(gather_test SRCS gather_test.cc DEPS tensor)
+cc_library(gather_op SRCS gather_op.cc DEPS op_registry)
+# cc_test(gather_op_test SRCS gather_op_test.cc DEPS gather_op)
 
 cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
 
diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc
new file mode 100644
index 0000000000..1008a57a87
--- /dev/null
+++ b/paddle/operators/gather_op.cc
@@ -0,0 +1,64 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/gather_op.h"
+#include "paddle/framework/ddim.h"
+
+namespace paddle {
+namespace operators {
+
+class GatherOp : public framework::OperatorWithKernel {
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    PADDLE_ENFORCE(ctx.InputSize() == 2, "");
+    PADDLE_ENFORCE(ctx.OutputSize() == 1, "");
+    int batch_size = ctx.Input<Tensor>(1)->dims()[0];
+    PADDLE_ENFORCE(batch_size > 0);
+  }
+};
+
+class GatherOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  GatherOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "The source input of gather op");
+    AddInput("Index", "The index input of gather op");
+    AddOutput("Y", "The output of add op");
+    AddComment(R"DOC(
+Gather Operator by selecting from the first axis, 
+
+Y = X[Index]
+)DOC");
+  }
+};
+
+class GatherGradOp : public framework::OperatorWithKernel {
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    // ctx.Output<Tensor>("X" + framework::kGradVarSuffix)
+    //    ->Resize(ctx.Input<Tensor>("X")->dims());
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP(gather, ops::GatherOp, ops::GatherOpMaker);
+REGISTER_OP_CPU_KERNEL(gather,
+                       ops::GatherOpKernel<paddle::platform::CPUPlace, float>);
+REGISTER_GRADIENT_OP(gather, gather_grad, ops::GatherGradOp);
+REGISTER_OP_CPU_KERNEL(
+    gather_grad,
+    ops::GatherGradientOpKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/gather_op.h b/paddle/operators/gather_op.h
new file mode 100644
index 0000000000..13e4c9b058
--- /dev/null
+++ b/paddle/operators/gather_op.h
@@ -0,0 +1,52 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include "gather.h"
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+#include "scatter.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+
+template <typename Place, typename T>
+class GatherOpKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto X = ctx.Input<Tensor>("X");
+    auto Index = ctx.Input<Tensor>("Index");
+    auto Y = ctx.Output<Tensor>("Y");
+
+    Y->mutable_data<T>(ctx.GetPlace());
+    Gather<T>(ctx.GetPlace(), X, Index, Y);
+  }
+};
+
+template <typename Place, typename T>
+class GatherGradientOpKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto Index = ctx.Input<Tensor>("Index");
+    auto dX = ctx.Output<Tensor>(framework::GradVarName("X"));
+    auto dY = ctx.Input<Tensor>(framework::GradVarName("Y"));
+
+    ScatterUpdate<T>(ctx.GetPlace(), dY, Index, dX);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle

From caaa5f86b91beda67daf8ae295cf99fa4dce12ba Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Fri, 11 Aug 2017 15:09:04 -0700
Subject: [PATCH 020/170] gather op added

---
 paddle/framework/CMakeLists.txt |  2 ++
 paddle/framework/empty_test.cc  | 56 +++++++++++++++++++++++++++++++++
 paddle/operators/gather_op.cc   |  2 ++
 3 files changed, 60 insertions(+)
 create mode 100644 paddle/framework/empty_test.cc

diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index 0398526024..9e306c8650 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -10,6 +10,8 @@ cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
 cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor)
 cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor)
 
+cc_test(empty_test SRCS	empty_test.cc DEPS tensor)
+
 cc_test(variable_test SRCS variable_test.cc)
 
 cc_library(scope SRCS scope.cc)
diff --git a/paddle/framework/empty_test.cc b/paddle/framework/empty_test.cc
new file mode 100644
index 0000000000..2237f8ce0e
--- /dev/null
+++ b/paddle/framework/empty_test.cc
@@ -0,0 +1,56 @@
+/*
+  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+*/
+
+#include <gtest/gtest.h>
+#include <string>
+#include "paddle/framework/tensor.h"
+
+TEST(Empty, Dims) {
+  using namespace paddle::framework;
+  using namespace paddle::platform;
+  Tensor tt;
+  tt.Resize(make_ddim({0, 3, 4}));
+  DDim dims = tt.dims();
+  ASSERT_EQ(arity(dims), 3);
+  EXPECT_EQ(0, dims[0]);
+  EXPECT_EQ(3, dims[1]);
+  EXPECT_EQ(4, dims[2]);
+}
+
+TEST(Empty, MutableData) {
+  using namespace paddle::framework;
+  using namespace paddle::platform;
+  {
+    Tensor src_tensor;
+    float* p1 = nullptr;
+    // initialization
+    p1 = src_tensor.mutable_data<float>(make_ddim({0, 2, 3}), CPUPlace());
+    EXPECT_NE(p1, nullptr);
+  }
+
+#ifndef PADDLE_ONLY_CPU
+  {
+    Tensor src_tensor;
+    float* p1 = nullptr;
+    float* p2 = nullptr;
+    // initialization
+    p1 = src_tensor.mutable_data<float>(make_ddim({0, 2, 3}), GPUPlace());
+    EXPECT_NE(p1, nullptr);
+    // set src_tensor a new dim with large size
+    // momery is supposed to be re-allocated
+    p2 = src_tensor.mutable_data<float>(make_ddim({0, 4}), GPUPlace());
+    EXPECT_NE(p2, nullptr);
+    // EXPECT_NE(p1, p2);
+  }
+#endif
+}
diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc
index 1008a57a87..3414a3c263 100644
--- a/paddle/operators/gather_op.cc
+++ b/paddle/operators/gather_op.cc
@@ -23,6 +23,8 @@ class GatherOp : public framework::OperatorWithKernel {
   void InferShape(const framework::InferShapeContext &ctx) const override {
     PADDLE_ENFORCE(ctx.InputSize() == 2, "");
     PADDLE_ENFORCE(ctx.OutputSize() == 1, "");
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0),
+                            "Inputs of GatherOp must all be set");
     int batch_size = ctx.Input<Tensor>(1)->dims()[0];
     PADDLE_ENFORCE(batch_size > 0);
   }

From 2a42a73db1ff32f7d1b9dfc772070fc7827aa4e7 Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Fri, 11 Aug 2017 15:10:59 -0700
Subject: [PATCH 021/170] modify gather_op with test

---
 paddle/operators/gather_op.cc | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc
index 3414a3c263..5a4f889f3d 100644
--- a/paddle/operators/gather_op.cc
+++ b/paddle/operators/gather_op.cc
@@ -27,6 +27,9 @@ class GatherOp : public framework::OperatorWithKernel {
                             "Inputs of GatherOp must all be set");
     int batch_size = ctx.Input<Tensor>(1)->dims()[0];
     PADDLE_ENFORCE(batch_size > 0);
+    paddle::framework::DDim output_dims(ctx.Input<Tensor>(0)->dims());
+    output_dims[0] = batch_size;
+    ctx.Output<Tensor>(0)->Resize(output_dims);
   }
 };
 
@@ -48,8 +51,8 @@ Y = X[Index]
 class GatherGradOp : public framework::OperatorWithKernel {
  protected:
   void InferShape(const framework::InferShapeContext &ctx) const override {
-    // ctx.Output<Tensor>("X" + framework::kGradVarSuffix)
-    //    ->Resize(ctx.Input<Tensor>("X")->dims());
+    ctx.Output<Tensor>("X" + framework::kGradVarSuffix)
+        ->Resize(ctx.Input<Tensor>("X")->dims());
   }
 };
 

From f6bffd4e1ff506319fa1a3338038d61d3f653181 Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Fri, 11 Aug 2017 15:40:23 -0700
Subject: [PATCH 022/170] gather_op modified

---
 paddle/operators/gather_op.cc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc
index 5a4f889f3d..05ba52ce06 100644
--- a/paddle/operators/gather_op.cc
+++ b/paddle/operators/gather_op.cc
@@ -51,8 +51,10 @@ Y = X[Index]
 class GatherGradOp : public framework::OperatorWithKernel {
  protected:
   void InferShape(const framework::InferShapeContext &ctx) const override {
-    ctx.Output<Tensor>("X" + framework::kGradVarSuffix)
-        ->Resize(ctx.Input<Tensor>("X")->dims());
+    auto X_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
+    auto X = ctx.Input<Tensor>("X");
+
+    X_grad->Resize(X->dims());
   }
 };
 

From 323d4233f3cb0f72ddac36977941e84880a7eedc Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Tue, 15 Aug 2017 23:50:56 +0000
Subject: [PATCH 023/170] gather op added with python unittest

---
 paddle/operators/gather_op.cu                 | 20 ++++++++++++++++
 .../v2/framework/tests/test_gather_op.py      | 23 +++++++++++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 paddle/operators/gather_op.cu
 create mode 100644 python/paddle/v2/framework/tests/test_gather_op.py

diff --git a/paddle/operators/gather_op.cu b/paddle/operators/gather_op.cu
new file mode 100644
index 0000000000..3f04a7b3f8
--- /dev/null
+++ b/paddle/operators/gather_op.cu
@@ -0,0 +1,20 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#define EIGEN_USE_GPU
+#include "paddle/operators/gather_op.h"
+
+namespace ops = paddle::operators;
+REGISTER_OP_GPU_KERNEL(gather,
+                       ops::GatherOpKernel<paddle::platform::GPUPlace, float>);
diff --git a/python/paddle/v2/framework/tests/test_gather_op.py b/python/paddle/v2/framework/tests/test_gather_op.py
new file mode 100644
index 0000000000..2ffbf17236
--- /dev/null
+++ b/python/paddle/v2/framework/tests/test_gather_op.py
@@ -0,0 +1,23 @@
+import unittest
+
+import numpy
+import paddle.v2.framework.core as core
+from paddle.v2.framework.op import Operator
+
+from op_test_util import OpTestMeta
+
+
+class TestGatherOp(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+
+    def setUp(self):
+        self.type = "gather"
+        self.inputs = {
+            'X': numpy.random.random((10, 20)).astype("float32"),
+            'Index': numpy.array([1, 3, 5]).astype("int")
+        }
+        self.outputs = {'Y': self.input['X'][self.input['Index']]}
+
+
+if __name__ == "__main__":
+    unittest.main()

From 4d2adab772e3c0789e9696533da61ee3583363d1 Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Tue, 15 Aug 2017 23:54:16 +0000
Subject: [PATCH 024/170] gather op added with python unittest

---
 paddle/framework/CMakeLists.txt               |  1 +
 paddle/framework/pybind.cc                    |  1 +
 paddle/operators/CMakeLists.txt               |  3 +-
 paddle/operators/gather_op.cc                 | 43 +++++++++++--------
 .../paddle/v2/framework/tests/CMakeLists.txt  |  1 +
 5 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index 9e306c8650..30313780a3 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -47,6 +47,7 @@ cc_library(paddle_pybind SHARED
     SRCS pybind.cc
     DEPS pybind python backward
     sgd_op
+    gather_op
     add_op
     mul_op
     rowwise_add_op
diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc
index fe0c87bc57..90311e0dc3 100644
--- a/paddle/framework/pybind.cc
+++ b/paddle/framework/pybind.cc
@@ -42,6 +42,7 @@ USE_OP(fill_zeros_like);
 USE_OP_ITSELF(recurrent_op);
 USE_OP(gaussian_random);
 USE_OP(uniform_random);
+USE_CPU_ONLY_OP(gather);
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index 5ac898a8d3..6849e39cb7 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -43,7 +43,8 @@ endfunction()
 
 add_subdirectory(math)
 cc_test(gather_test SRCS gather_test.cc DEPS tensor)
-cc_library(gather_op SRCS gather_op.cc DEPS op_registry)
+op_library(gather_op SRCS gather_op.cc gather_op.cu)
+# DEPS op_registry)
 # cc_test(gather_op_test SRCS gather_op_test.cc DEPS gather_op)
 
 cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc
index 05ba52ce06..2e08ba8dcc 100644
--- a/paddle/operators/gather_op.cc
+++ b/paddle/operators/gather_op.cc
@@ -19,17 +19,33 @@ namespace paddle {
 namespace operators {
 
 class GatherOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
  protected:
   void InferShape(const framework::InferShapeContext &ctx) const override {
-    PADDLE_ENFORCE(ctx.InputSize() == 2, "");
-    PADDLE_ENFORCE(ctx.OutputSize() == 1, "");
-    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0),
-                            "Inputs of GatherOp must all be set");
-    int batch_size = ctx.Input<Tensor>(1)->dims()[0];
+    // PADDLE_ENFORCE(ctx.InputSize() == 2, "");
+    // PADDLE_ENFORCE(ctx.OutputSize() == 1, "");
+    // PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0),
+    //                         "Inputs of GatherOp must all be set");
+    int batch_size = ctx.Input<Tensor>("Index")->dims()[0];
     PADDLE_ENFORCE(batch_size > 0);
     paddle::framework::DDim output_dims(ctx.Input<Tensor>(0)->dims());
     output_dims[0] = batch_size;
-    ctx.Output<Tensor>(0)->Resize(output_dims);
+    ctx.Output<Tensor>("Y")->Resize(output_dims);
+  }
+};
+
+class GatherGradOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    auto X_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
+    auto X = ctx.Input<Tensor>("X");
+
+    X_grad->Resize(X->dims());
   }
 };
 
@@ -47,25 +63,14 @@ Y = X[Index]
 )DOC");
   }
 };
-
-class GatherGradOp : public framework::OperatorWithKernel {
- protected:
-  void InferShape(const framework::InferShapeContext &ctx) const override {
-    auto X_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
-    auto X = ctx.Input<Tensor>("X");
-
-    X_grad->Resize(X->dims());
-  }
-};
-
 }  // namespace operators
 }  // namespace paddle
 
 namespace ops = paddle::operators;
-REGISTER_OP(gather, ops::GatherOp, ops::GatherOpMaker);
+REGISTER_OP(gather, ops::GatherOp, ops::GatherOpMaker, gather_grad,
+            ops::GatherGradOp);
 REGISTER_OP_CPU_KERNEL(gather,
                        ops::GatherOpKernel<paddle::platform::CPUPlace, float>);
-REGISTER_GRADIENT_OP(gather, gather_grad, ops::GatherGradOp);
 REGISTER_OP_CPU_KERNEL(
     gather_grad,
     ops::GatherGradientOpKernel<paddle::platform::CPUPlace, float>);
diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt
index 96fad9b42e..1032743a13 100644
--- a/python/paddle/v2/framework/tests/CMakeLists.txt
+++ b/python/paddle/v2/framework/tests/CMakeLists.txt
@@ -13,6 +13,7 @@ py_test(test_add_two_op SRCS test_add_two_op.py)
 py_test(test_sigmoid_op SRCS test_sigmoid_op.py)
 py_test(test_softmax_op SRCS test_softmax_op.py)
 py_test(test_cross_entropy_op SRCS test_cross_entropy_op.py)
+py_test(test_gather_op SRCS test_gather_op.py)
 py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py)
 
 py_test(gradient_checker SRCS gradient_checker.py)

From a037b099f7f4bf8370e882f397bd4c691b0e0986 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Mon, 14 Aug 2017 15:49:48 +0800
Subject: [PATCH 025/170] finish unittest.

---
 .../gserver/layers/CrossEntropyOverBeam.cpp   |   1 +
 .../tests/test_CrossEntropyOverBeamGrad.cpp   | 218 +++++++++++++++---
 2 files changed, 191 insertions(+), 28 deletions(-)

diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
index 8b6223ec6a..88d80aa83a 100644
--- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
@@ -22,6 +22,7 @@ bool CrossEntropyOverBeam::init(const LayerMap& layerMap,
                                 const ParameterMap& parameterMap) {
   /* Initialize the basic parent class */
   Layer::init(layerMap, parameterMap);
+  CHECK_EQ(0U, inputLayers_.size() % 3) << "Error input number.";
 
   setNeedSequenceInfo(false);
 
diff --git a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
index e9ecebcfe5..a5f06c15dc 100644
--- a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
+++ b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include <random>
 #include <sstream>
 
 #include <gtest/gtest.h>
@@ -27,6 +28,10 @@ using namespace paddle;  // NOLINT
 DECLARE_int32(gpu_id);
 DECLARE_bool(thread_local_rand_use_global_seed);
 
+const size_t MAX_SEQ_NUM = 10;
+const size_t MAX_SEQ_LEN = 27;
+const size_t MAX_BEAM_SIZE = 10;
+
 struct SingleBeamExpansion {
   vector<int> seqStartPos;
   vector<int> subSeqStartPos;
@@ -34,37 +39,195 @@ struct SingleBeamExpansion {
 
   // TODO(caoying): store this into Argument.ids
   vector<real> selectedIndices;
+
   vector<int> groundTruth;
-  vector<int> labelSeqStartPos;
+  vector<size_t> inBeam;
+  vector<int> rowIdxInBeam;
 };
 
-void genCandidateScores(bool hasSubSeq,
-                        vector<real>& scores,
+void genRand(real* numbers, size_t n) {
+  default_random_engine generator;
+  uniform_real_distribution<double> distribution(0.0, 1.0);
+  for (size_t i = 0; i < n; ++i) numbers[i] = distribution(generator);
+}
+
+vector<real> randSampling(real range, int n) {
+  CHECK_GE(range, n);
+  vector<real> num(range);
+  iota(begin(num), end(num), 0.);
+  if (range == n) return num;
+
+  random_shuffle(begin(num), end(num));
+  num.resize(n);
+  sort(begin(num), end(num));
+  return num;
+}
+
+void genCandidateScores(bool hasSubseq,
+                        size_t beamSize,
+                        SingleBeamExpansion& prevBeam,
+                        SingleBeamExpansion& curBeam) {
+  vector<int>& seqStartPos = curBeam.seqStartPos;
+  seqStartPos.resize(1, 0);
+  vector<int>& subSeqStartPos = curBeam.subSeqStartPos;
+  subSeqStartPos.resize(1, 0);
+
+  srand((size_t)(time(NULL)));
+  // srand(1);
+  if (prevBeam.selectedIndices.size()) {
+    if (prevBeam.subSeqStartPos.size() > 1) {
+      int seqIdx = 1;
+      // samples in previous beam are nested sequences.
+      for (size_t i = 1; i < prevBeam.subSeqStartPos.size(); ++i) {
+        for (size_t j = 0; j < beamSize; ++j) {
+          if (prevBeam.selectedIndices[(i - 1) * beamSize + j] == -1.) break;
+          for (size_t k = 0; k < beamSize; ++k)
+            subSeqStartPos.push_back(1 + (rand() % MAX_SEQ_LEN) +
+                                     subSeqStartPos.back());
+        }
+        if (prevBeam.seqStartPos[seqIdx] == prevBeam.subSeqStartPos[i]) {
+          seqStartPos.push_back(subSeqStartPos.back());
+          seqIdx++;
+        }
+      }
+    } else {
+      // samples in previous beam are sequences.
+      for (size_t i = 0; i <= prevBeam.selectedIndices.size(); ++i) {
+        if (i && i % beamSize == 0) {
+          seqStartPos.push_back(subSeqStartPos.back());
+          if (i == prevBeam.selectedIndices.size()) break;
+        }
+        if (prevBeam.selectedIndices[i] == -1.) continue;
+        subSeqStartPos.push_back(subSeqStartPos.back() +
+                                 (1 + (rand() % MAX_SEQ_LEN)));
+      }
+    }
+  } else {
+    // the first beam expansion
+    int seqNum = 1 + (rand() % MAX_SEQ_NUM);
+    for (int i = 0; i < seqNum; ++i) {
+      if (hasSubseq) {
+        for (size_t j = 0; j < 1 + (rand() % MAX_SEQ_NUM); ++j)
+          subSeqStartPos.push_back(subSeqStartPos.back() +
+                                   (1 + (rand() % MAX_SEQ_LEN)));
+        seqStartPos.push_back(subSeqStartPos.back());
+      } else {
+        seqStartPos.push_back(seqStartPos.back() +
+                              (1 + (rand() % MAX_SEQ_LEN)));
+      }
+    }
+  }
+
+  size_t totalSeqNum = hasSubseq ? subSeqStartPos.back() : seqStartPos.back();
+  curBeam.candidateScores.resize(totalSeqNum, 0.);
+  genRand(curBeam.candidateScores.data(), totalSeqNum);
+}
+
+void genSelectedIndices(size_t beamSize,
                         vector<int>& seqStartPos,
-                        vector<int>& subSeqStartPos) {}
-
-void genSelectedIndicesAndGroundtruth(size_t beamSize,
-                                      vector<int>& seqStartPos,
-                                      vector<real>& selectedIndices) {}
-
-SingleBeamExpansion genOneBeam(size_t beamSize, bool hasSubSeq) {
-  SingleBeamExpansion beam;
-  genCandidateScores(
-      hasSubSeq, beam.candidateScores, beam.seqStartPos, beam.subSeqStartPos);
-  genSelectedIndicesAndGroundtruth(
-      beamSize,
-      hasSubSeq ? beam.subSeqStartPos : beam.seqStartPos,
-      beam.selectedIndices);
-  return beam;
+                        vector<real>& selectedIndices) {
+  size_t selectedIdsCount = beamSize * (seqStartPos.size() - 1);
+  selectedIndices.resize(selectedIdsCount, -1.);
+
+  for (size_t i = 0; i < seqStartPos.size() - 1; ++i) {
+    int seqLen = seqStartPos[i + 1] - seqStartPos[i];
+    int n = min(seqLen, static_cast<int>(beamSize));
+    vector<real> ids = randSampling(seqLen, n);
+    memcpy(selectedIndices.data() + i * beamSize,
+           ids.data(),
+           sizeof(real) * ids.size());
+  }
+}
+
+void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
+                    size_t beamSize) {
+  size_t seqNum = beamExpansions[1].seqStartPos.size() - 1;
+  for (size_t i = 2; i < beamExpansions.size(); ++i)
+    CHECK_EQ(seqNum, beamExpansions[i - 1].seqStartPos.size() - 1);
+
+  // srand(1);
+  srand((size_t)(time(NULL)));
+
+  // initialize the first beam.
+  SingleBeamExpansion& beam = beamExpansions[1];
+  beam.groundTruth.resize(seqNum, 0);
+  beam.inBeam.resize(seqNum, 0);
+  beam.rowIdxInBeam.resize(seqNum, -1);
+
+  auto begPos = beam.selectedIndices.begin();
+  for (size_t i = 0; i < seqNum; ++i) {
+    int seqLen = beam.seqStartPos[i + 1] - beam.seqStartPos[i];
+    int label = rand() % seqLen;
+    auto endPos = begPos + beamSize;
+    beam.groundTruth[i] = label;
+    if (find(begPos, endPos, real(label)) != endPos) beam.inBeam[i] = 1;
+    begPos = endPos;
+    beam.rowIdxInBeam[i] = i;
+  }
+
+  // iterate over each beam expansions
+  for (size_t i = 2; i < beamExpansions.size(); ++i) {
+    SingleBeamExpansion& curBeam = beamExpansions[i];
+    SingleBeamExpansion& prevBeam = beamExpansions[i - 1];
+
+    curBeam.groundTruth.resize(seqNum, 0);
+    curBeam.inBeam.resize(seqNum, 0);
+    curBeam.rowIdxInBeam.resize(seqNum, -1);
+
+    // iterate over each sequence
+    for (size_t j = 0; j < seqNum; ++j) {
+      if (prevBeam.inBeam[j]) {
+        // gold sequence falls in the beam in previous search.
+
+        auto begPos = prevBeam.selectedIndices.begin();
+        auto endPos = begPos + prevBeam.rowIdxInBeam[j] * beamSize;
+        size_t totalExpansion =
+            prevBeam.rowIdxInBeam[j] * beamSize - count(begPos, endPos, -1.);
+        curBeam.rowIdxInBeam[j] = totalExpansion + prevBeam.groundTruth[j];
+
+        CHECK_LE(curBeam.rowIdxInBeam[j] + 1,
+                 curBeam.subSeqStartPos.size() - 1);
+        int start = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j]];
+        int end = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j] + 1];
+        CHECK_GT(size_t(end), size_t(start));
+        int label = rand() % (end - start);
+
+        curBeam.groundTruth[j] = label;
+        auto findBeg = curBeam.selectedIndices.begin() +
+                       curBeam.rowIdxInBeam[j] * beamSize;
+        auto findEnd = findBeg + beamSize;
+        if (find(findBeg, findEnd, real(label)) != findEnd)
+          curBeam.inBeam[j] = 1;
+      } else {
+        // in previous search, gold sequence has fallen off the beam,
+        // the beam search stops, here use -1 as a dummy label.
+        // It will not used in calculation the cost.
+        beamExpansions[i].groundTruth[j] = -1;
+      }
+    }
+  }
+}
+
+void genOneBeam(size_t beamSize,
+                bool hasSubseq,
+                SingleBeamExpansion& prevBeam,
+                SingleBeamExpansion& curBeam) {
+  genCandidateScores(hasSubseq, beamSize, prevBeam, curBeam);
+  genSelectedIndices(beamSize,
+                     hasSubseq ? curBeam.subSeqStartPos : curBeam.seqStartPos,
+                     curBeam.selectedIndices);
 }
 
 void genRandomBeamExpansion(size_t expansionCount,
                             size_t beamSize,
                             vector<SingleBeamExpansion>& beamExpansions) {
   beamExpansions.clear();
-  for (size_t i = 0; i < expansionCount; ++i) {
-    beamExpansions.emplace_back(genOneBeam(beamSize, i));
-  }
+  beamExpansions.resize(expansionCount + 1);
+
+  // beamExpansions[0] is reserved.
+  for (size_t i = 1; i <= expansionCount; ++i)
+    genOneBeam(beamSize, bool(i - 1), beamExpansions[i - 1], beamExpansions[i]);
+  genGroundTruth(beamExpansions, beamSize);
 }
 
 void testCrossEntropyOverBeam(bool useGpu) {
@@ -72,12 +235,12 @@ void testCrossEntropyOverBeam(bool useGpu) {
   config.layerConfig.set_type("cross_entropy_over_beam");
 
   const size_t expansionCount = 3;
-  const size_t beamSize = 3;
+  const size_t beamSize = MAX_BEAM_SIZE;
   vector<SingleBeamExpansion> beams;
   genRandomBeamExpansion(expansionCount, beamSize, beams);
 
   size_t seqNum = 0;
-  for (size_t i = 0; i < beams.size(); ++i) {
+  for (size_t i = 1; i < beams.size(); ++i) {
     const SingleBeamExpansion& beam = beams[i];
     // create scores for all the candidates
     MatrixPtr candidateScorePtr =
@@ -88,7 +251,7 @@ void testCrossEntropyOverBeam(bool useGpu) {
     ostringstream paramName;
     paramName << "candidate_scores_" << i;
 
-    if (beam.subSeqStartPos.size()) {
+    if (beam.subSeqStartPos.size() > 1) {
       seqNum = beam.subSeqStartPos.size() - 1;
       config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
                                   paramName.str(),
@@ -118,10 +281,9 @@ void testCrossEntropyOverBeam(bool useGpu) {
     // create the ground truth
     paramName.clear();
     paramName << "label_" << i;
-    config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
-                                paramName.str(),
-                                beam.groundTruth,
-                                beam.labelSeqStartPos});
+    config.inputDefs.push_back(
+        {INPUT_SELF_DEFINE_DATA, paramName.str(), beam.groundTruth});
+    config.layerConfig.add_inputs();
   }
 
   testLayerGrad(

From 6075928d5531b5eecff0d3183c1d47ab3b0962d4 Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Wed, 16 Aug 2017 19:02:29 +0000
Subject: [PATCH 026/170] gather op added

---
 paddle/operators/gather.h                          | 2 ++
 paddle/operators/gather_op.cc                      | 8 ++------
 python/paddle/v2/framework/tests/test_gather_op.py | 7 ++++---
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/paddle/operators/gather.h b/paddle/operators/gather.h
index d6e6990394..3f299ea1a6 100644
--- a/paddle/operators/gather.h
+++ b/paddle/operators/gather.h
@@ -17,6 +17,8 @@ limitations under the License. */
 #include <cstring>
 
 #include "paddle/framework/ddim.h"
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
 #include "paddle/framework/tensor.h"
 #include "paddle/platform/place.h"
 
diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc
index 2e08ba8dcc..499def05a7 100644
--- a/paddle/operators/gather_op.cc
+++ b/paddle/operators/gather_op.cc
@@ -24,13 +24,9 @@ class GatherOp : public framework::OperatorWithKernel {
 
  protected:
   void InferShape(const framework::InferShapeContext &ctx) const override {
-    // PADDLE_ENFORCE(ctx.InputSize() == 2, "");
-    // PADDLE_ENFORCE(ctx.OutputSize() == 1, "");
-    // PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0),
-    //                         "Inputs of GatherOp must all be set");
     int batch_size = ctx.Input<Tensor>("Index")->dims()[0];
-    PADDLE_ENFORCE(batch_size > 0);
-    paddle::framework::DDim output_dims(ctx.Input<Tensor>(0)->dims());
+    PADDLE_ENFORCE_GE(batch_size, 0, "Batch size must be >0");
+    paddle::framework::DDim output_dims(ctx.Input<Tensor>("X")->dims());
     output_dims[0] = batch_size;
     ctx.Output<Tensor>("Y")->Resize(output_dims);
   }
diff --git a/python/paddle/v2/framework/tests/test_gather_op.py b/python/paddle/v2/framework/tests/test_gather_op.py
index 2ffbf17236..049054d07b 100644
--- a/python/paddle/v2/framework/tests/test_gather_op.py
+++ b/python/paddle/v2/framework/tests/test_gather_op.py
@@ -12,11 +12,12 @@ class TestGatherOp(unittest.TestCase):
 
     def setUp(self):
         self.type = "gather"
+        xnp = numpy.random.random((10, 20)).astype("float32")
         self.inputs = {
-            'X': numpy.random.random((10, 20)).astype("float32"),
-            'Index': numpy.array([1, 3, 5]).astype("int")
+            'X': xnp,
+            'Index': numpy.array([1, 3, 5]).astype("int32")
         }
-        self.outputs = {'Y': self.input['X'][self.input['Index']]}
+        self.outputs = {'Y': self.inputs['X'][self.inputs['Index']]}
 
 
 if __name__ == "__main__":

From 02299813685a7172d9e9182631b71473b492c904 Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Wed, 16 Aug 2017 23:54:38 +0000
Subject: [PATCH 027/170] remove empty test

---
 paddle/framework/CMakeLists.txt |  2 --
 paddle/framework/empty_test.cc  | 56 ---------------------------------
 2 files changed, 58 deletions(-)
 delete mode 100644 paddle/framework/empty_test.cc

diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index 68b5cec2c5..2cdf323c53 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -10,8 +10,6 @@ cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
 cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor)
 cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor)
 
-cc_test(empty_test SRCS	empty_test.cc DEPS tensor)
-
 cc_test(variable_test SRCS variable_test.cc)
 
 cc_library(scope SRCS scope.cc)
diff --git a/paddle/framework/empty_test.cc b/paddle/framework/empty_test.cc
deleted file mode 100644
index 2237f8ce0e..0000000000
--- a/paddle/framework/empty_test.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
-  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-  http://www.apache.org/licenses/LICENSE-2.0
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
-*/
-
-#include <gtest/gtest.h>
-#include <string>
-#include "paddle/framework/tensor.h"
-
-TEST(Empty, Dims) {
-  using namespace paddle::framework;
-  using namespace paddle::platform;
-  Tensor tt;
-  tt.Resize(make_ddim({0, 3, 4}));
-  DDim dims = tt.dims();
-  ASSERT_EQ(arity(dims), 3);
-  EXPECT_EQ(0, dims[0]);
-  EXPECT_EQ(3, dims[1]);
-  EXPECT_EQ(4, dims[2]);
-}
-
-TEST(Empty, MutableData) {
-  using namespace paddle::framework;
-  using namespace paddle::platform;
-  {
-    Tensor src_tensor;
-    float* p1 = nullptr;
-    // initialization
-    p1 = src_tensor.mutable_data<float>(make_ddim({0, 2, 3}), CPUPlace());
-    EXPECT_NE(p1, nullptr);
-  }
-
-#ifndef PADDLE_ONLY_CPU
-  {
-    Tensor src_tensor;
-    float* p1 = nullptr;
-    float* p2 = nullptr;
-    // initialization
-    p1 = src_tensor.mutable_data<float>(make_ddim({0, 2, 3}), GPUPlace());
-    EXPECT_NE(p1, nullptr);
-    // set src_tensor a new dim with large size
-    // momery is supposed to be re-allocated
-    p2 = src_tensor.mutable_data<float>(make_ddim({0, 4}), GPUPlace());
-    EXPECT_NE(p2, nullptr);
-    // EXPECT_NE(p1, p2);
-  }
-#endif
-}

From 27a99bfb1446171969da0219a6125a79c39eb582 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Thu, 17 Aug 2017 18:10:37 +0800
Subject: [PATCH 028/170] Add base class for huber_regression_cost and
 huber_classification_cost

---
 doc/api/v2/config/layer.rst          |  6 +--
 paddle/gserver/layers/CostLayer.cpp  | 55 ++++++++++++----------------
 paddle/gserver/layers/CostLayer.h    | 27 ++++++++++----
 python/paddle/v2/tests/test_layer.py |  2 +-
 4 files changed, 46 insertions(+), 44 deletions(-)

diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst
index cb330ea5e1..22a6b2ab84 100644
--- a/doc/api/v2/config/layer.rst
+++ b/doc/api/v2/config/layer.rst
@@ -409,9 +409,9 @@ multi_binary_label_cross_entropy_cost
 ..  autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost
     :noindex:
 
-huber_cost
-----------
-..  autoclass:: paddle.v2.layer.huber_cost
+huber_classification_cost
+-------------------------
+..  autoclass:: paddle.v2.layer.huber_classification_cost
     :noindex:
 
 lambda_cost
diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp
index 138c86a6d6..69cf393225 100644
--- a/paddle/gserver/layers/CostLayer.cpp
+++ b/paddle/gserver/layers/CostLayer.cpp
@@ -572,13 +572,8 @@ void MultiBinaryLabelCrossEntropy::backwardImp(Matrix& output,
   }
 }
 
-//
-// Huber loss for robust 2-classes classification
-//
-REGISTER_LAYER(huber, HuberTwoClassification);
-
-bool HuberTwoClassification::init(const LayerMap& layerMap,
-                                  const ParameterMap& parameterMap) {
+bool HuberCost::init(const LayerMap& layerMap,
+                     const ParameterMap& parameterMap) {
   CostLayer::init(layerMap, parameterMap);
   if (useGpu_) {
     tmpCpuInput_.reserve(inputLayers_.size());
@@ -589,9 +584,7 @@ bool HuberTwoClassification::init(const LayerMap& layerMap,
   return true;
 }
 
-void HuberTwoClassification::forwardImp(Matrix& output,
-                                        Argument& label,
-                                        Matrix& cost) {
+void HuberCost::forwardImp(Matrix& output, Argument& label, Matrix& cost) {
   if (useGpu_) {
     for (size_t i = 0; i < inputLayers_.size(); i++) {
       tmpCpuInput_[i].resizeAndCopyFrom(
@@ -599,12 +592,22 @@ void HuberTwoClassification::forwardImp(Matrix& output,
     }
     hl_stream_synchronize(HPPL_STREAM_DEFAULT);
   }
-  forwardImpIn(output, label, cost);
 }
 
-void HuberTwoClassification::forwardImpIn(Matrix& output,
-                                          Argument& label,
-                                          Matrix& target) {
+//
+// Huber loss for robust 2-classes classification
+//
+REGISTER_LAYER(huber_classification, HuberTwoClassification);
+
+bool HuberTwoClassification::init(const LayerMap& layerMap,
+                                  const ParameterMap& parameterMap) {
+  return HuberCost::init(layerMap, parameterMap);
+}
+
+void HuberTwoClassification::forwardImp(Matrix& output,
+                                        Argument& label,
+                                        Matrix& target) {
+  HuberCost::forwardImp(output, label, target);
   size_t numSamples = target.getHeight();
   CHECK(label.ids);
   CHECK_EQ((*label.ids).getSize(), numSamples);
@@ -627,25 +630,13 @@ void HuberTwoClassification::forwardImpIn(Matrix& output,
   target.copyFrom(cost.data(), numSamples);
 }
 
-void HuberTwoClassification::backwardImp(Matrix& outputValue,
+void HuberTwoClassification::backwardImp(Matrix& output,
                                          Argument& label,
-                                         Matrix& outputGrad) {
-  if (useGpu_) {
-    backwardImpIn(
-        *tmpCpuInput_[0].value, tmpCpuInput_[1], *tmpCpuInput_[0].grad);
-    outputGrad.copyFrom(*tmpCpuInput_[0].grad);
-  } else {
-    backwardImpIn(outputValue, label, outputGrad);
-  }
-}
-
-void HuberTwoClassification::backwardImpIn(Matrix& output,
-                                           Argument& label,
-                                           Matrix& outputG) {
+                                         Matrix& outputG) {
   size_t numSamples = output.getHeight();
-  real* out = output.getData();
-  real* grad = outputG.getData();
-  int* lbl = (*label.ids).getData();
+  real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
+  int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData();
+  real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData();
   for (size_t i = 0; i < numSamples; ++i) {
     int y = 2 * lbl[i] - 1;
     if (y * out[i] < -1)
@@ -653,8 +644,8 @@ void HuberTwoClassification::backwardImpIn(Matrix& output,
     else if (y * out[i] < 1)
       grad[i] += -2 * (1 - y * out[i]) * y;
   }
+  if (useGpu_) outputG.copyFrom(grad, numSamples);
 }
-
 /**
  * This cost layer compute the sum of its input as loss.
  * \f[
diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h
index 77427b7a08..c006dc8110 100644
--- a/paddle/gserver/layers/CostLayer.h
+++ b/paddle/gserver/layers/CostLayer.h
@@ -304,6 +304,23 @@ public:
                    Matrix& outputGrad) override;
 };
 
+/*
+ * A base layer for HuberRegressionLoss and HuberTwoClassification.
+ */
+class HuberCost : public CostLayer {
+public:
+  std::vector<Argument> tmpCpuInput_;
+
+  explicit HuberCost(const LayerConfig& config) : CostLayer(config) {}
+
+  bool init(const LayerMap& layerMap,
+            const ParameterMap& parameterMap) override;
+
+  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
+
+  void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {}
+};
+
 /**
  * Huber loss for robust 2-classes classification.
  *
@@ -312,25 +329,19 @@ public:
  * Loss = (1 - y * f)^2, if -1 < y * f < 1  \\
  * Loss = 0, otherwise
  */
-class HuberTwoClassification : public CostLayer {
-  std::vector<Argument> tmpCpuInput_;
-
+class HuberTwoClassification : public HuberCost {
 public:
   explicit HuberTwoClassification(const LayerConfig& config)
-      : CostLayer(config) {}
+      : HuberCost(config) {}
 
   bool init(const LayerMap& layerMap,
             const ParameterMap& parameterMap) override;
 
   void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
 
-  void forwardImpIn(Matrix& output, Argument& label, Matrix& cost);
-
   void backwardImp(Matrix& outputValue,
                    Argument& label,
                    Matrix& outputGrad) override;
-
-  void backwardImpIn(Matrix& outputValue, Argument& label, Matrix& outputGrad);
 };
 
 typedef std::shared_ptr<CostLayer> CostLayerPtr;
diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py
index f2097e195f..7373a55ce6 100644
--- a/python/paddle/v2/tests/test_layer.py
+++ b/python/paddle/v2/tests/test_layer.py
@@ -141,7 +141,7 @@ class CostLayerTest(unittest.TestCase):
         cost8 = layer.rank_cost(left=score, right=score, label=score)
         cost9 = layer.lambda_cost(input=inference, score=score)
         cost10 = layer.sum_cost(input=inference)
-        cost11 = layer.huber_cost(input=score, label=label)
+        cost11 = layer.huber_classification_cost(input=score, label=label)
 
         print layer.parse_network([cost1, cost2])
         print layer.parse_network([cost3, cost4])

From 4d8992c3bc64a835aa6a1e6e12678594d3f117b5 Mon Sep 17 00:00:00 2001
From: tensor-tang <jian.j.tang@intel.com>
Date: Fri, 18 Aug 2017 09:58:41 +0800
Subject: [PATCH 029/170] check format before set header format

---
 paddle/parameter/Parameter.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/paddle/parameter/Parameter.h b/paddle/parameter/Parameter.h
index e31cbc3dee..08a426eb74 100644
--- a/paddle/parameter/Parameter.h
+++ b/paddle/parameter/Parameter.h
@@ -278,7 +278,11 @@ public:
   /**
    * @brief Set the format in header.
    */
-  void setHeaderFormat(int32_t fmt) { headerFormat_ = fmt; }
+  void setHeaderFormat(int32_t fmt) {
+    CHECK(isHeaderFormatSupported(fmt)) << "Unsupported format version: "
+                                        << fmt;
+    headerFormat_ = fmt;
+  }
 
   /**
    * @brief  Parameter Update Hook.

From 462b9b1d20942dca35dbe532248e53cdeccea6b2 Mon Sep 17 00:00:00 2001
From: tensor-tang <jian.j.tang@intel.com>
Date: Fri, 18 Aug 2017 10:13:06 +0800
Subject: [PATCH 030/170] update mkldnn tag v0.10

---
 cmake/external/mkldnn.cmake | 2 +-
 cmake/external/mklml.cmake  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake
index 25c6b4ef52..9686df0021 100644
--- a/cmake/external/mkldnn.cmake
+++ b/cmake/external/mkldnn.cmake
@@ -51,7 +51,7 @@ ExternalProject_Add(
     ${EXTERNAL_PROJECT_LOG_ARGS}
     DEPENDS             ${MKLDNN_DEPENDS}
     GIT_REPOSITORY      "https://github.com/01org/mkl-dnn.git"
-    GIT_TAG             "v0.9"
+    GIT_TAG             "v0.10"
     PREFIX              ${MKLDNN_SOURCES_DIR}
     UPDATE_COMMAND      ""
     CMAKE_ARGS          -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR}
diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake
index e9fd3d4bed..51fafb9479 100644
--- a/cmake/external/mklml.cmake
+++ b/cmake/external/mklml.cmake
@@ -28,7 +28,7 @@ INCLUDE(ExternalProject)
 
 SET(MKLML_PROJECT       "extern_mklml")
 SET(MKLML_VER           "mklml_lnx_2018.0.20170720")
-SET(MKLML_URL           "https://github.com/01org/mkl-dnn/releases/download/v0.9/${MKLML_VER}.tgz")
+SET(MKLML_URL           "https://github.com/01org/mkl-dnn/releases/download/v0.10/${MKLML_VER}.tgz")
 SET(MKLML_SOURCE_DIR    "${THIRD_PARTY_PATH}/mklml")
 SET(MKLML_DOWNLOAD_DIR  "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}")
 SET(MKLML_DST_DIR       "mklml")

From 46d30ec680f494e4cc30a73330074497da064fbd Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Thu, 17 Aug 2017 20:34:02 -0700
Subject: [PATCH 031/170] init minst.py

---
 python/paddle/v2/framework/tests/mnist.py | 140 ++++++++++++++++++++++
 1 file changed, 140 insertions(+)
 create mode 100644 python/paddle/v2/framework/tests/mnist.py

diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py
new file mode 100644
index 0000000000..32a088ac28
--- /dev/null
+++ b/python/paddle/v2/framework/tests/mnist.py
@@ -0,0 +1,140 @@
+import paddle.v2.framework.core as core
+from paddle.v2.framework.op import Operator
+import numpy
+
+BATCH_SIZE = 100
+
+scope = core.Scope()
+place = core.CPUPlace()
+dev_ctx = core.DeviceContext.create(place)
+
+# init_net = core.Net.create()
+forward_network = core.Net.create()
+
+# should be init after forward_op is constructed
+# backward_net = core.Operator.backward(forward_net, set())
+backward_net = None
+optimize_net = core.Net.create()
+
+
+def atom_id():
+    id = 0
+    while True:
+        yield id
+        id += 1
+
+
+uniq_id = atom_id().next
+
+
+def data_layer(name, dims):
+    var = scope.new_var(name)
+    tensor = var.get_tensor()
+    tensor.set_dims(dims)  # 1 is batch size holder.
+    return name
+
+
+def feed_data(name, data):
+    assert isinstance(data, numpy.array)
+    tensor = scope.find_var(name).get_tensor()
+    tensor.set_dims(data.shape)
+    tensor.alloc_float(place)
+    tensor.set(data, place)
+
+
+def grad_var_name(var_name):
+    return var_name + "@GRAD"
+
+
+def sgd_optimizer(net, param_name, learning_rate=0.01):
+    grad_name = grad_var_name(param_name)
+    optimize_op = Operator(
+        "sgd", param=param_name, grad=grad_name, learning_rate=learning_rate)
+    net.add_op(optimize_op)
+
+
+# should use operator and add these to the init_network
+def init_param(param_name, dims):
+    print param_name
+    var = scope.new_var(param_name)
+    tensor = var.get_tensor()
+    tensor.set_dims(dims)
+    data = numpy.random.uniform(
+        low=0.0, high=1.0, size=tensor.shape()).astype("float32")
+    tensor.set(data, place)
+
+
+# fc_layer
+def fc_layer(net, input, size, act="sigmoid", bias=True, param=None, name=None):
+    """
+    Add a fc layer to net
+
+    :param input: input variable name.
+    :type input: str
+    :param size: fully connected layer size.
+    :param act: activation name
+    :param param: parameter attribute, used for initialize parameters.
+    :param bias: bias attribute. False will not have a bias.
+    :param name: the name of fc layer. If not set, model will generate a
+    readable name
+    :return: output variable name.
+    """
+    if name is None:
+        name = 'fc_%d' % uniq_id()
+    if not isinstance(name, str):
+        raise ValueError("name should be string")
+
+    input_dims = scope.find_var(input).get_tensor().get_dims()
+
+    w_name = param or name + ".w"
+    init_param(param_name=w_name, dims=[input_dims[1], size])
+    sgd_optimizer(net=optimize_net, param_name=w_name, learning_rate=0.01)
+
+    pre_activation = name + ".mul.out"
+    scope.new_var(pre_activation)
+    mul_op = Operator("mul", X=input, Y=w_name, Out=pre_activation)
+    net.add_op(mul_op)
+
+    # create bias variable if needed
+    if bias:
+        bias_name = name + ".b"
+        init_param(param_name=bias_name, dims=[size])
+        sgd_optimizer(
+            net=optimize_net, param_name=bias_name, learning_rate=0.01)
+        bias_out = name + ".rowwise_add.out"
+        scope.new_var(bias_out)
+        rowwise_add_op = Operator(
+            "rowwise_add", X=pre_activation, b=bias_name, Out=bias_out)
+        net.add_op(rowwise_add_op)
+        pre_activation = bias_out
+
+    activation_op = Operator(act, X=pre_activation, Y=name)
+    net.add_op(activation_op)
+    scope.new_var(name)
+    net.infer_shape(scope)
+    return name
+
+
+def cross_entropy_layer(net, input, label):
+    cost_name = 'cross_entropy_%d' % uniq_id()
+    cross_entropy_op = Operator(
+        "onehot_cross_entropy", X=input, label=label, Y=cost_name)
+    net.add_op(cross_entropy_op)
+    scope.new_var(cost_name)
+    net.infer_shape(scope)
+    return cost_name
+
+
+images = data_layer(name='pixel', dims=[BATCH_SIZE, 784])
+label = data_layer(name='label', dims=[BATCH_SIZE])
+fc = fc_layer(net=forward_network, input=images, size=10, act="softmax")
+cost = cross_entropy_layer(net=forward_network, input=fc, label=label)
+forward_network.complete_add_op(True)
+print(forward_network)
+backward_net = core.Operator.backward(forward_network, set())
+
+print(backward_net)
+
+PASS_NUM = 10
+for pass_id in range(PASS_NUM):
+    print pass_id

From 424b325d084ef0fd5aa61996f35ef88126c48306 Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Fri, 18 Aug 2017 14:10:27 +0800
Subject: [PATCH 032/170] add unit test DeConv3D, Conv3D, col2vol, vol2col

---
 paddle/gserver/tests/test_LayerGrad.cpp  | 152 +++++++++++++++++++++++
 paddle/math/tests/test_matrixCompare.cpp | 116 +++++++++++++++++
 2 files changed, 268 insertions(+)

diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index 0f312b6ca5..1e80e2c0ee 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -2007,6 +2007,158 @@ TEST(Layer, RowL2NormLayer) {
   }
 }
 
+void test3DConvLayer(const string& type, bool trans, bool useGpu) {
+  // filter size
+  const int NUM_FILTERS = 6;
+  // const int CHANNELS = 3;
+  const int FILTER_SIZE = 3;
+  const int FILTER_SIZE_Y = 3;
+  const int FILTER_SIZE_Z = 3;
+
+  // input image
+  const int CHANNELS = 3;
+  const int IMAGE_SIZE = 9;
+  const int IMAGE_SIZE_Y = 9;
+  const int IMAGE_SIZE_Z = 9;  //  2, 3, 5, 5, 5
+
+  TestConfig config;
+  config.biasSize = NUM_FILTERS;
+  config.layerConfig.set_type(type);
+  config.layerConfig.set_num_filters(NUM_FILTERS);
+  config.layerConfig.set_partial_sum(1);
+  config.layerConfig.set_shared_biases(true);
+
+  // Setting up conv3D-trans layer
+  LayerInputConfig* input = config.layerConfig.add_inputs();
+  ConvConfig* conv = input->mutable_conv_conf();
+
+  conv->set_channels(CHANNELS);
+  conv->set_filter_size(FILTER_SIZE);
+  conv->set_filter_size_y(FILTER_SIZE_Y);
+  conv->set_filter_size_z(FILTER_SIZE_Z);
+  conv->set_padding(0);
+  conv->set_padding_y(0);
+  conv->set_padding_z(0);
+  conv->set_stride(2);
+  conv->set_stride_y(2);
+  conv->set_stride_z(2);
+  conv->set_img_size(IMAGE_SIZE);
+  conv->set_img_size_y(IMAGE_SIZE_Y);
+  conv->set_img_size_z(IMAGE_SIZE_Z);
+  conv->set_output_x(outputSize(conv->img_size(),
+                                conv->filter_size(),
+                                conv->padding(),
+                                conv->stride(),
+                                /*  caffeMode */ true));
+  conv->set_output_y(outputSize(conv->img_size_y(),
+                                conv->filter_size_y(),
+                                conv->padding_y(),
+                                conv->stride_y(),
+                                /*  caffeMode */ true));
+  conv->set_output_z(outputSize(conv->img_size_z(),
+                                conv->filter_size_z(),
+                                conv->padding_z(),
+                                conv->stride_z(),
+                                /*  caffeMode */ true));
+
+  config.layerConfig.set_size(conv->output_x() * conv->output_y() *
+                              conv->output_z() * NUM_FILTERS);
+  conv->set_groups(1);
+  conv->set_filter_channels(conv->channels() / conv->groups());
+  config.inputDefs.push_back(
+      {INPUT_DATA,
+       "layer_0",
+       CHANNELS * IMAGE_SIZE * IMAGE_SIZE_Y * IMAGE_SIZE_Z,
+       conv->filter_channels() * FILTER_SIZE * FILTER_SIZE_Y * FILTER_SIZE_Z *
+           NUM_FILTERS});
+
+  testLayerGrad(config, "conv3D", 10, trans, useGpu);
+  // Use small batch_size and useWeight=true to test biasGrad
+  testLayerGrad(config, "conv3D", 2, trans, useGpu, true, 0.02);
+}
+
+TEST(Layer, test3DConvLayer) {
+  test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ false);
+#ifndef PADDLE_ONLY_CPU
+  test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ true);
+#endif
+}
+
+int deConvOutputSize(int inSize, int kSize, int pad, int stride) {
+  return (inSize - 1) * stride - 2 * pad + kSize;
+}
+
+void test3DDeConvLayer(const string& type, bool trans, bool useGpu) {
+  // filter size
+  const int NUM_FILTERS = 6;
+  // const int CHANNELS = 3;
+  const int FILTER_SIZE = 3;
+  const int FILTER_SIZE_Y = 3;
+  const int FILTER_SIZE_Z = 3;
+
+  // input image
+  const int CHANNELS = 3;
+  const int IMAGE_SIZE = 4;
+  const int IMAGE_SIZE_Y = 6;
+  const int IMAGE_SIZE_Z = 6;
+
+  // Setting up conv-trans layer
+  TestConfig config;
+  config.biasSize = NUM_FILTERS;
+  config.layerConfig.set_type("deconv3d");
+  config.layerConfig.set_num_filters(NUM_FILTERS);
+  config.layerConfig.set_partial_sum(1);
+  config.layerConfig.set_shared_biases(true);
+
+  LayerInputConfig* input = config.layerConfig.add_inputs();
+  ConvConfig* conv = input->mutable_conv_conf();
+
+  conv->set_channels(CHANNELS);
+  conv->set_filter_size(FILTER_SIZE);
+  conv->set_filter_size_y(FILTER_SIZE_Y);
+  conv->set_filter_size_z(FILTER_SIZE_Z);
+  conv->set_padding(0);
+  conv->set_padding_y(0);
+  conv->set_padding_z(0);
+  conv->set_stride(2);
+  conv->set_stride_y(2);
+  conv->set_stride_z(2);
+  conv->set_img_size(IMAGE_SIZE);
+  conv->set_img_size_y(IMAGE_SIZE_Y);
+  conv->set_img_size_z(IMAGE_SIZE_Z);
+  conv->set_output_x(deConvOutputSize(
+      conv->img_size(), conv->filter_size(), conv->padding(), conv->stride()));
+  conv->set_output_y(deConvOutputSize(conv->img_size_y(),
+                                      conv->filter_size_y(),
+                                      conv->padding_y(),
+                                      conv->stride_y()));
+  conv->set_output_z(deConvOutputSize(conv->img_size_z(),
+                                      conv->filter_size_z(),
+                                      conv->padding_z(),
+                                      conv->stride_z()));
+  config.layerConfig.set_size(conv->output_x() * conv->output_y() *
+                              conv->output_z() * NUM_FILTERS);
+  conv->set_groups(1);
+  conv->set_filter_channels(conv->channels() / conv->groups());
+  config.inputDefs.push_back(
+      {INPUT_DATA,
+       "layer_0",
+       CHANNELS * IMAGE_SIZE * IMAGE_SIZE_Y * IMAGE_SIZE_Z,
+       conv->filter_channels() * FILTER_SIZE * FILTER_SIZE_Y * FILTER_SIZE_Z *
+           NUM_FILTERS});
+
+  testLayerGrad(config, "deconv3D", 10, trans, useGpu);
+  // Use small batch_size and useWeight=true to test biasGrad
+  testLayerGrad(config, "deconv3D", 2, trans, useGpu, true, 0.02);
+}
+
+TEST(Layer, test3DDeConvLayer) {
+  test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ false);
+#ifndef PADDLE_ONLY_CPU
+  test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ true);
+#endif
+}
+
 int main(int argc, char** argv) {
   testing::InitGoogleTest(&argc, argv);
   initMain(argc, argv);
diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp
index d77478f345..1d41ec0870 100644
--- a/paddle/math/tests/test_matrixCompare.cpp
+++ b/paddle/math/tests/test_matrixCompare.cpp
@@ -1203,4 +1203,120 @@ TEST(Matrix, warpCTC) {
   }
 }
 
+int outputSizeCol2Vol(
+    int imageSize, int filterSize, int padding, int stride, bool caffeMode) {
+  int outputSize;
+  if (!caffeMode) {
+    outputSize =
+        (imageSize - filterSize + 2 * padding + stride - 1) / stride + 1;
+  } else {
+    outputSize = (imageSize - filterSize + 2 * padding) / stride + 1;
+  }
+  CHECK_GE(outputSize, 1);
+  return outputSize;
+}
+
+void testMatrixCol2Vol(int depth, int height, int width) {
+  int channel = 3;
+  int filterX = 3, filterY = 4, filterZ = 5;
+  int strideX = 2, strideY = 2, strideZ = 2;
+  int padX = 1, padY = 1, padZ = 1;
+
+  MatrixPtr cpuImage =
+      std::make_shared<CpuMatrix>(channel, depth * height * width);
+  MatrixPtr gpuImage =
+      std::make_shared<GpuMatrix>(channel, depth * height * width);
+  cpuImage->randomizeUniform();
+  gpuImage->copyFrom(*cpuImage);
+
+  int outD = outputSizeCol2Vol(depth, filterZ, padZ, strideZ, true);
+  int outH = outputSizeCol2Vol(height, filterY, padZ, strideY, true);
+  int outW = outputSizeCol2Vol(width, filterX, padZ, strideX, true);
+
+  int colBufHeight = channel * filterZ * filterY * filterX;
+  int colBufWidth = outD * outH * outW;
+  MatrixPtr cpuColBuf = std::make_shared<CpuMatrix>(colBufHeight, colBufWidth);
+  MatrixPtr gpuColBuf = std::make_shared<GpuMatrix>(colBufHeight, colBufWidth);
+  cpuColBuf->vol2Col(cpuImage->getData(),
+                     channel,
+                     depth,
+                     height,
+                     width,
+                     filterZ,
+                     filterY,
+                     filterX,
+                     strideZ,
+                     strideY,
+                     strideX,
+                     padZ,
+                     padY,
+                     padX);
+  gpuColBuf->vol2Col(gpuImage->getData(),
+                     channel,
+                     depth,
+                     height,
+                     width,
+                     filterZ,
+                     filterY,
+                     filterX,
+                     strideZ,
+                     strideY,
+                     strideX,
+                     padZ,
+                     padY,
+                     padX);
+  TensorCheckEqual(*cpuColBuf, *gpuColBuf);
+
+  cpuColBuf->randomizeUniform();
+  gpuColBuf->copyFrom(*cpuColBuf);
+  cpuColBuf->col2Vol(cpuImage->getData(),
+                     channel,
+                     depth,
+                     height,
+                     width,
+                     filterZ,
+                     filterY,
+                     filterX,
+                     strideZ,
+                     strideY,
+                     strideX,
+                     padZ,
+                     padY,
+                     padX,
+                     1.0,
+                     1.0);
+  gpuColBuf->col2Vol(gpuImage->getData(),
+                     channel,
+                     depth,
+                     height,
+                     width,
+                     filterZ,
+                     filterY,
+                     filterX,
+                     strideZ,
+                     strideY,
+                     strideX,
+                     padZ,
+                     padY,
+                     padX,
+                     1.0,
+                     1.0);
+  TensorCheckErr(*cpuImage, *gpuImage);
+}
+
+TEST(Matrix, col2Vol) {
+  for (auto depth : {9, 16, 64, 128}) {
+    for (auto height : {9, 11, 73, 128, 256}) {
+      for (auto width : {
+               9, 32, 100, 512,
+           }) {
+        VLOG(3) << "depth=" << depth << " height=" << height
+                << " width=" << width;
+        testMatrixCol2Vol(depth, height, width);
+      }
+    }
+  }
+}
+///////
+
 #endif

From c792ef7d5ae470031bebcd990b79c0ce7f36f7bc Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Fri, 18 Aug 2017 14:12:01 +0800
Subject: [PATCH 033/170] fix DeConv3D, Conv3D

---
 paddle/gserver/layers/Conv3DLayer.cpp   | 248 +++++++++++++-----------
 paddle/gserver/layers/DeConv3DLayer.cpp | 186 +++++++++---------
 2 files changed, 229 insertions(+), 205 deletions(-)

diff --git a/paddle/gserver/layers/Conv3DLayer.cpp b/paddle/gserver/layers/Conv3DLayer.cpp
index 0fa9c5f9f5..5609a4cc73 100644
--- a/paddle/gserver/layers/Conv3DLayer.cpp
+++ b/paddle/gserver/layers/Conv3DLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "Conv3DLayer.h"
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
-#include "Conv3DLayer.h"
 
 namespace paddle {
 
@@ -22,32 +22,30 @@ REGISTER_LAYER(conv3d, Conv3DLayer);
 
 bool Conv3DLayer::init(const LayerMap &layerMap,
                        const ParameterMap &parameterMap) {
-  if (!ConvBaseLayer::init(layerMap, parameterMap))
-      return false;
+  if (!ConvBaseLayer::init(layerMap, parameterMap)) return false;
   int index = 0;
   for (auto &inputConfig : config_.inputs()) {
-      const ConvConfig &conf = inputConfig.conv_conf();
-      M_.push_back(numFilters_ / conf.groups());
-      K_.push_back(
-              conf.filter_channels() * conf.filter_size_z() * \
-      conf.filter_size_y() * conf.filter_size());
-      weights_[index]->getW()->reshape(
-              weights_[index]->getW()->getWidth(),
-              weights_[index]->getW()->getHeight());
+    const ConvConfig &conf = inputConfig.conv_conf();
+    M_.push_back(numFilters_ / conf.groups());
+    K_.push_back(filterPixels_[index] * filterChannels_[index]);
+    if (nullptr != weights_[index]->getW())
+      weights_[index]->getW()->reshape(weights_[index]->getW()->getWidth(),
+                                       weights_[index]->getW()->getHeight());
+    if (nullptr != weights_[index]->getWGrad())
       weights_[index]->getWGrad()->reshape(
-              weights_[index]->getWGrad()->getWidth(),
-              weights_[index]->getWGrad()->getHeight());
-      ++index;
+          weights_[index]->getWGrad()->getWidth(),
+          weights_[index]->getWGrad()->getHeight());
+    ++index;
   }
-  biases_->getWGrad()->reshape(
-          biases_->getWGrad()->width_, biases_->getWGrad()->height_);
-  biases_->getW()->reshape(
-          biases_->getW()->width_, biases_->getW()->height_);
+  if (nullptr != biases_->getWGrad())
+    biases_->getWGrad()->reshape(biases_->getWGrad()->width_,
+                                 biases_->getWGrad()->height_);
+  if (nullptr != biases_->getW())
+    biases_->getW()->reshape(biases_->getW()->width_, biases_->getW()->height_);
   CHECK(inputLayers_.size() == parameters_.size());
   return true;
 }
 
-
 size_t Conv3DLayer::getSize() {
   CHECK_NE(inputLayers_.size(), 0UL);
   // imgSizeH_.clear();
@@ -59,22 +57,19 @@ size_t Conv3DLayer::getSize() {
   N_.clear();
   size_t layerSize = 0;
   for (size_t i = 0; i < inputLayers_.size(); ++i) {
-      // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight());
-      // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth());
-      // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth());
-      outputW_.push_back(outputSize(
-              imgSizeW_[i], filterSize_[i],
-              padding_[i], stride_[i], true));
-      outputH_.push_back(outputSize(
-              imgSizeH_[i], filterSizeY_[i],
-              paddingY_[i], strideY_[i], true));
-      outputD_.push_back(outputSize(
-              imgSizeD_[i], filterSizeZ_[i],
-              paddingZ_[i], strideZ_[i], true));
-
-      N_.push_back(outputD_[i] * outputH_[i] * outputW_[i]);
-      CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize);
-      layerSize += N_[i] * numFilters_;
+    // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight());
+    // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth());
+    // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth());
+    outputW_.push_back(outputSize(
+        imgSizeW_[i], filterSize_[i], padding_[i], stride_[i], true));
+    outputH_.push_back(outputSize(
+        imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i], true));
+    outputD_.push_back(outputSize(
+        imgSizeD_[i], filterSizeZ_[i], paddingZ_[i], strideZ_[i], true));
+
+    N_.push_back(outputD_[i] * outputH_[i] * outputW_[i]);
+    CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize);
+    layerSize += N_[i] * numFilters_;
   }
   getOutput().setFrameHeight(outputH_[0]);
   getOutput().setFrameWidth(outputW_[0]);
@@ -88,38 +83,46 @@ void Conv3DLayer::forward(PassType passType) {
   int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
   int outWidth = getSize();
   resetOutput(batchSize, outWidth);
-  const MatrixPtr outMat = getOutputValue();
 
   for (size_t i = 0; i != inputLayers_.size(); ++i) {
-      REGISTER_TIMER_INFO("FwdConv3D", getName().c_str());
-      const MatrixPtr& inMat = getInputValue(i);
-      int width = inMat->getWidth();
-      int M = M_[i];
-      int N = N_[i];
-      int K = K_[i];
-      Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
-      MatrixPtr wMat = weights_[i]->getW();
-      for (int n = 0; n < batchSize; ++n) {
-          colBuf_->vol2Col(inMat->getData() + n * width, channels_[i],
-                           imgSizeD_[i], imgSizeH_[i], imgSizeW_[i],
-                           filterSizeZ_[i], filterSizeY_[i], filterSize_[i],
-                           strideZ_[i], strideY_[i], stride_[i],
-                           paddingZ_[i], paddingY_[i], padding_[i]);
-
-          real *outData = outMat->getData() + n * outWidth;
-          MatrixPtr outMatSub =
-                  Matrix::create(outData, groups_[i] * M, N, false, useGpu_);
-          for (int g = 0; g < groups_[i]; g++) {
-              MatrixPtr wMatSub = wMat->subMatrix(g * M, M);
-              MatrixPtr in = colBuf_->subMatrix(g * K, K);
-              MatrixPtr out = outMatSub->subMatrix(g * M, M);
-              out->mul(*wMatSub, *in, 1.0, 0.0);
-          }
+    REGISTER_TIMER_INFO("FwdConv3D", getName().c_str());
+    const MatrixPtr &inMat = getInputValue(i);
+    const MatrixPtr &outMat = getOutputValue();
+    int M = M_[i];
+    int N = N_[i];
+    int K = K_[i];
+    Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
+    MatrixPtr wMat = weights_[i]->getW();
+    for (int n = 0; n < batchSize; ++n) {
+      colBuf_->vol2Col(inMat->getData() + n * inMat->getStride(),
+                       channels_[i],
+                       imgSizeD_[i],
+                       imgSizeH_[i],
+                       imgSizeW_[i],
+                       filterSizeZ_[i],
+                       filterSizeY_[i],
+                       filterSize_[i],
+                       strideZ_[i],
+                       strideY_[i],
+                       stride_[i],
+                       paddingZ_[i],
+                       paddingY_[i],
+                       padding_[i]);
+
+      real *outData = outMat->getData() + n * outMat->getStride();
+      MatrixPtr outMatSub =
+          Matrix::create(outData, groups_[i] * M, N, false, useGpu_);
+      for (int g = 0; g < groups_[i]; g++) {
+        MatrixPtr wMatSub = wMat->subMatrix(g * M, M);
+        MatrixPtr in = colBuf_->subMatrix(g * K, K);
+        MatrixPtr out = outMatSub->subMatrix(g * M, M);
+        out->mul(*wMatSub, *in, 1.0, 1.0);
       }
+    }
   }
   if (nullptr != this->biasParameter_) {
-      REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str());
-      this->addBias();
+    REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str());
+    this->addBias();
   }
   forwardActivation();
 }
@@ -128,20 +131,20 @@ void Conv3DLayer::backward(const UpdateCallback &callback) {
   backwardActivation();
 
   if (biases_ && biases_->getWGrad()) {
-      bpropBiases();
-      biases_->getParameterPtr()->incUpdate(callback);
+    bpropBiases();
+    biases_->getParameterPtr()->incUpdate(callback);
   }
 
   for (size_t i = 0; i != inputLayers_.size(); ++i) {
-      REGISTER_TIMER_INFO("BwdConv3D", getName().c_str());
-      if (weights_[i]->getWGrad()) {
-          bpropWeights(i);
-      }
-      if (this->needGradient_) {
-          bpropData(i);
-      }
-      REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
-      weights_[i]->getParameterPtr()->incUpdate(callback);
+    REGISTER_TIMER_INFO("BwdConv3D", getName().c_str());
+    if (weights_[i]->getWGrad()) {
+      bpropWeights(i);
+    }
+    if (getInputGrad(i)) {
+      bpropData(i);
+    }
+    REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
+    weights_[i]->getParameterPtr()->incUpdate(callback);
   }
 }
 
@@ -149,28 +152,36 @@ void Conv3DLayer::bpropWeights(int i) {
   int M = M_[i];
   int N = N_[i];
   int K = K_[i];
-  const MatrixPtr& inMat = getInputValue(i);
-  int width = inMat->getWidth();
+  const MatrixPtr &inMat = getInputValue(i);
   Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
   MatrixPtr wGradMat = weights_[i]->getWGrad();
-  real* outGradData = getOutputGrad()->getData();
   int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
-
   for (int n = 0; n < batchSize; ++n) {
-      colBuf_->vol2Col(inMat->getData() + n * width, channels_[i],
-                       imgSizeD_[i], imgSizeH_[i], imgSizeW_[i],
-                       filterSizeZ_[i], filterSizeY_[i], filterSize_[i],
-                       strideZ_[i], strideY_[i], stride_[i],
-                       paddingZ_[i], paddingY_[i], padding_[i]);
-      outGradData += n * getOutputGrad()->getWidth();
-      MatrixPtr outGradSub =
-              Matrix::create(outGradData, groups_[i] * M, N, false, useGpu_);
-      for (int g = 0; g < groups_[i]; ++g) {
-          MatrixPtr inMatSub = colBuf_->subMatrix(g * K, K);
-          MatrixPtr outG = outGradSub->subMatrix(g * M, M);
-          MatrixPtr wGradSub = wGradMat->subMatrix(g * M, M);
-          wGradSub->mul(*outG, *(inMatSub->getTranspose()), 1.0, 1.0);
-      }
+    colBuf_->vol2Col(inMat->getData() + n * inMat->getStride(),
+                     channels_[i],
+                     imgSizeD_[i],
+                     imgSizeH_[i],
+                     imgSizeW_[i],
+                     filterSizeZ_[i],
+                     filterSizeY_[i],
+                     filterSize_[i],
+                     strideZ_[i],
+                     strideY_[i],
+                     stride_[i],
+                     paddingZ_[i],
+                     paddingY_[i],
+                     padding_[i]);
+
+    real *outGradData =
+        getOutputGrad()->getData() + n * getOutputGrad()->getStride();
+    MatrixPtr outGradSub =
+        Matrix::create(outGradData, groups_[i] * M, N, false, useGpu_);
+    for (int g = 0; g < groups_[i]; ++g) {
+      MatrixPtr inMatSub = colBuf_->subMatrix(g * K, K);
+      MatrixPtr outG = outGradSub->subMatrix(g * M, M);
+      MatrixPtr wGradSub = wGradMat->subMatrix(g * M, M);
+      wGradSub->mul(*outG, *(inMatSub->getTranspose()), 1.0, 1.0);
+    }
   }
 }
 
@@ -180,45 +191,54 @@ void Conv3DLayer::bpropData(int i) {
   int K = K_[i];
   Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
   MatrixPtr wMat = weights_[i]->getW();
-  real* outGradData = getOutputGrad()->getData();
-  real* preGradData = getInputGrad(i)->getData();
   int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
   for (int n = 0; n < batchSize; ++n) {
-      outGradData += n * getOutputGrad()->getWidth();
-      preGradData += n * getInputGrad(i)->getWidth();
-      MatrixPtr outGradSub =
-              Matrix::create(outGradData, M * groups_[i], N, false, useGpu_);
-      for (int g = 0; g < groups_[i]; ++g) {
-          MatrixPtr wMatSub = wMat->subMatrix(g * M, M);
-          MatrixPtr outG = outGradSub->subMatrix(g * M, M);
-          MatrixPtr inGradMatSub = colBuf_->subMatrix(g * K, K);
-          inGradMatSub->mul(*(wMatSub->getTranspose()), *outG, 1.0, 0.0);
-      }
-      colBuf_->col2Vol(preGradData, channels_[i],
-                       imgSizeD_[i], imgSizeH_[i], imgSizeW_[i],
-                       filterSizeZ_[i], filterSizeY_[i], filterSize_[i],
-                       strideZ_[i], strideY_[i], stride_[i],
-                       paddingZ_[i], paddingY_[i], padding_[i],
-                       1.0, 1.0);
+    real *outGradData =
+        getOutputGrad()->getData() + n * getOutputGrad()->getStride();
+    real *preGradData =
+        getInputGrad(i)->getData() + n * getInputGrad(i)->getStride();
+    MatrixPtr outGradSub =
+        Matrix::create(outGradData, M * groups_[i], N, false, useGpu_);
+    for (int g = 0; g < groups_[i]; ++g) {
+      MatrixPtr wMatSub = wMat->subMatrix(g * M, M);
+      MatrixPtr outG = outGradSub->subMatrix(g * M, M);
+      MatrixPtr inGradMatSub = colBuf_->subMatrix(g * K, K);
+      inGradMatSub->mul(*(wMatSub->getTranspose()), *outG, 1.0, 0.0);
+    }
+    colBuf_->col2Vol(preGradData,
+                     channels_[i],
+                     imgSizeD_[i],
+                     imgSizeH_[i],
+                     imgSizeW_[i],
+                     filterSizeZ_[i],
+                     filterSizeY_[i],
+                     filterSize_[i],
+                     strideZ_[i],
+                     strideY_[i],
+                     stride_[i],
+                     paddingZ_[i],
+                     paddingY_[i],
+                     padding_[i],
+                     1.0,
+                     1.0);
   }
 }
 
 void Conv3DLayer::bpropBiases() {
   MatrixPtr outGradMat = getOutputGrad();
   if (this->sharedBiases_) {
-      biases_->getWGrad()->collectSharedBias(*outGradMat, 1.0f);
+    biases_->getWGrad()->collectSharedBias(*outGradMat, 1.0f);
   } else {
-      biases_->getWGrad()->collectBias(*outGradMat, 1.0f);
+    biases_->getWGrad()->collectBias(*outGradMat, 1.0f);
   }
 }
 
 void Conv3DLayer::addBias() {
   MatrixPtr outMat = getOutputValue();
-
   if (this->sharedBiases_) {
-      outMat->addSharedBias(*(biases_->getW()), 1.0f);
+    outMat->addSharedBias(*(biases_->getW()), 1.0f);
   } else {
-      outMat->addBias(*(biases_->getW()), 1.0f);
+    outMat->addBias(*(biases_->getW()), 1.0f);
   }
 }
 
diff --git a/paddle/gserver/layers/DeConv3DLayer.cpp b/paddle/gserver/layers/DeConv3DLayer.cpp
index 8de40b681d..286f5b985c 100644
--- a/paddle/gserver/layers/DeConv3DLayer.cpp
+++ b/paddle/gserver/layers/DeConv3DLayer.cpp
@@ -12,43 +12,42 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "DeConv3DLayer.h"
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
-#include "DeConv3DLayer.h"
 
 namespace paddle {
 
 REGISTER_LAYER(deconv3d, DeConv3DLayer);
 
 #define DECONV_OUTPUT_SIZE(IN_SIZE, STRID, PAD, KSIZE) \
-    (((IN_SIZE) - 1) * (STRID) - 2 * (PAD) + (KSIZE))
+  (((IN_SIZE)-1) * (STRID)-2 * (PAD) + (KSIZE))
 
 bool DeConv3DLayer::init(const LayerMap &layerMap,
-                     const ParameterMap &parameterMap) {
+                         const ParameterMap &parameterMap) {
   if (!ConvBaseLayer::init(layerMap, parameterMap)) return false;
   // for Deconv, the dimension of Kernel is
   // channel * output * depth * height * weigth
   // Matrix storage format: (output * depth * height * weigth) x  channel
   for (int index = 0; index < config_.inputs().size(); ++index) {
     M_.push_back(filterChannels_[index]);
-    K_.push_back(
-            filterPixels_[index] * (numFilters_/groups_[index]));
-    weights_[index]->getW()->reshape(
-            filterPixels_[index] * numFilters_,
-            filterChannels_[index]);
-    weights_[index]->getWGrad()->reshape(
-            filterPixels_[index] * numFilters_,
-            filterChannels_[index]);
+    K_.push_back(filterPixels_[index] * (numFilters_ / groups_[index]));
+    if (weights_[index]->getW())
+      weights_[index]->getW()->reshape(filterPixels_[index] * numFilters_,
+                                       filterChannels_[index]);
+    if (weights_[index]->getWGrad())
+      weights_[index]->getWGrad()->reshape(filterPixels_[index] * numFilters_,
+                                           filterChannels_[index]);
   }
-  biases_->getWGrad()->reshape(
-          biases_->getWGrad()->width_, biases_->getWGrad()->height_);
-  biases_->getW()->reshape(
-          biases_->getW()->width_, biases_->getW()->height_);
+  if (biases_->getWGrad())
+    biases_->getWGrad()->reshape(biases_->getWGrad()->width_,
+                                 biases_->getWGrad()->height_);
+  if (biases_->getW())
+    biases_->getW()->reshape(biases_->getW()->width_, biases_->getW()->height_);
   CHECK(inputLayers_.size() == parameters_.size());
   return true;
 }
 
-
 size_t DeConv3DLayer::getSize() {
   CHECK_NE(inputLayers_.size(), 0UL);
   // imgSizeH_.clear();
@@ -64,18 +63,12 @@ size_t DeConv3DLayer::getSize() {
     // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight());
     // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth());
     // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth());
-    outputW_.push_back(
-            DECONV_OUTPUT_SIZE(
-                    imgSizeW_[i], stride_[i],
-                    padding_[i], filterSize_[i]));
-    outputH_.push_back(
-            DECONV_OUTPUT_SIZE(
-                    imgSizeH_[i], strideY_[i],
-                    paddingY_[i], filterSizeY_[i]));
-    outputD_.push_back(
-            DECONV_OUTPUT_SIZE(
-                    imgSizeD_[i], strideZ_[i],
-                    paddingZ_[i], filterSizeZ_[i]));
+    outputW_.push_back(DECONV_OUTPUT_SIZE(
+        imgSizeW_[i], stride_[i], padding_[i], filterSize_[i]));
+    outputH_.push_back(DECONV_OUTPUT_SIZE(
+        imgSizeH_[i], strideY_[i], paddingY_[i], filterSizeY_[i]));
+    outputD_.push_back(DECONV_OUTPUT_SIZE(
+        imgSizeD_[i], strideZ_[i], paddingZ_[i], filterSizeZ_[i]));
     No_.push_back(outputD_[i] * outputH_[i] * outputW_[i]);
     N_.push_back(imgSizeD_[i] * imgSizeH_[i] * imgSizeW_[i]);
     CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize);
@@ -96,32 +89,37 @@ void DeConv3DLayer::forward(PassType passType) {
 
   for (size_t i = 0; i != inputLayers_.size(); ++i) {
     REGISTER_TIMER_INFO("FwdDeConv3D", getName().c_str());
-    const MatrixPtr& inMat = getInputValue(i);
-    int width = inMat->getWidth();
+    const MatrixPtr &inMat = getInputValue(i);
     int M = M_[i];
     int N = N_[i];
     int K = K_[i];
     MatrixPtr wMat = weights_[i]->getW();
-    Matrix::resizeOrCreate(colBuf_, K * groups_[i] , N, false, useGpu_);
-
+    Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
     for (int n = 0; n < batchSize; ++n) {
-      real *inData = inMat->getData() + n * width;
-      real *colBufData = colBuf_->getData();
-      for (int g = 0; g < groups_[i]; g++) {
-         MatrixPtr wMatSub = wMat->subMatrix(g * K, K);
-         MatrixPtr inMatSub =
-                 Matrix::create(inData, M, N, false, useGpu_);
-         MatrixPtr colBufDataSub =
-                 Matrix::create(colBufData, K, N, false, useGpu_);
-         colBufDataSub->mul(*wMatSub, *inMatSub, 1.0, 0.0);
-         colBufData += K * N;
-         inData += M * N;
+      real *inData = inMat->getData() + n * inMat->getStride();
+      for (int g = 0; g < groups_[i]; ++g) {
+        MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_);
+        MatrixPtr wMatSub = wMat->subMatrix(g * K, K);
+        MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K);
+        colBufDataSub->mul(*wMatSub, *inMatSub, 1.0, 0.0);
+        inData += M * N;
       }
-      colBuf_->col2Vol(outMat->getData()+ n * outMat->getWidth(),
-                       numFilters_, outputD_[i], outputH_[i], outputW_[i],
-                       filterSizeZ_[i], filterSizeY_[i], filterSize_[i],
-                       strideZ_[i], strideY_[i], stride_[i],
-                       paddingZ_[i], paddingY_[i], padding_[i], 1.0, 1.0);
+      colBuf_->col2Vol(outMat->getData() + n * outMat->getStride(),
+                       numFilters_,
+                       outputD_[i],
+                       outputH_[i],
+                       outputW_[i],
+                       filterSizeZ_[i],
+                       filterSizeY_[i],
+                       filterSize_[i],
+                       strideZ_[i],
+                       strideY_[i],
+                       stride_[i],
+                       paddingZ_[i],
+                       paddingY_[i],
+                       padding_[i],
+                       1.0,
+                       1.0);
     }
   }
   if (nullptr != this->biasParameter_) {
@@ -134,63 +132,69 @@ void DeConv3DLayer::forward(PassType passType) {
 void DeConv3DLayer::backward(const UpdateCallback &callback) {
   backwardActivation();
   int batchSize = getOutputGrad()->getHeight();
-  int outputWidth = getOutputGrad()->getWidth();
   if (biases_ && biases_->getWGrad()) {
     bpropBiases();
     biases_->getParameterPtr()->incUpdate(callback);
   }
-  for (size_t i =0; i < inputLayers_.size(); ++i) {
-    int M = M_[i];
-    int N = N_[i];
-    int K = K_[i];
-    Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
-    const MatrixPtr& inMat = getInputValue(i);
-    for (int n = 0; n < batchSize; ++n) {
+  for (size_t i = 0; i < inputLayers_.size(); ++i) {
+    if (weights_[i]->getWGrad() || this->needGradient_) {
+      int M = M_[i];
+      int N = N_[i];
+      int K = K_[i];
       REGISTER_TIMER_INFO("BwdDeConv3D", getName().c_str());
-      if (weights_[i]->getWGrad() || this->needGradient_) {
-        colBuf_->vol2Col(getOutputGrad()->getData() + n * outputWidth,
-                     numFilters_, outputD_[i], outputH_[i], outputW_[i],
-                     filterSizeZ_[i], filterSizeY_[i], filterSize_[i],
-                     strideZ_[i], strideY_[i], stride_[i],
-                     paddingZ_[i], paddingY_[i], padding_[i]);
-      }
-      if (weights_[i]->getWGrad()) {
-        real *inData = inMat->getData() + n * inMat->getWidth();;
-        real *wGradData = weights_[i]->getWGrad()->getData();
-        for (int g = 0; g < groups_[i]; g++) {
-          MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K);
-          MatrixPtr inMatSub = Matrix::create(
-                  inData, M, N, false, useGpu_);
-          MatrixPtr wGradMatSub = Matrix::create(
-                  wGradData, K, M, false, useGpu_);
-          wGradMatSub->mul(*colBufDataSub,
-                  *(inMatSub->getTranspose()), 1.0, 1.0);
-          wGradData += K * M;
-          inData += M * N;
+      Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
+      const MatrixPtr &inMat = getInputValue(i);
+      for (int n = 0; n < batchSize; ++n) {
+        colBuf_->vol2Col(
+            getOutputGrad()->getData() + n * getOutputGrad()->getStride(),
+            numFilters_,
+            outputD_[i],
+            outputH_[i],
+            outputW_[i],
+            filterSizeZ_[i],
+            filterSizeY_[i],
+            filterSize_[i],
+            strideZ_[i],
+            strideY_[i],
+            stride_[i],
+            paddingZ_[i],
+            paddingY_[i],
+            padding_[i]);
+        if (weights_[i]->getWGrad()) {
+          real *inData = inMat->getData() + n * inMat->getStride();
+          for (int g = 0; g < groups_[i]; ++g) {
+            MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K);
+            MatrixPtr wGradMatSub =
+                weights_[i]->getWGrad()->subMatrix(g * K, K);
+            MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_);
+            wGradMatSub->mul(
+                *colBufDataSub, *(inMatSub->getTranspose()), 1.0, 1.0);
+            inData += M * N;
+          }
         }
-        weights_[i]->getParameterPtr()->incUpdate(callback);
-      }
-      if (this->needGradient_) {
-        real* preGrad = getInputGrad(i)->getData();
-        for (int g = 0; g < groups_[i]; ++g) {
-          MatrixPtr w = weights_[i]->getW()->subMatrix(g * K, K);
-          MatrixPtr outGradMat = colBuf_->subMatrix(g * K, K);
-          MatrixPtr inGradMatSub = Matrix::create(
-                  preGrad, M, N, false, useGpu_);
-          inGradMatSub->mul(*(w->getTranspose()), *outGradMat, 1.0, 0.0);
-          preGrad += M * N;
+        if (getInputGrad(i)) {
+          real *preGrad =
+              getInputGrad(i)->getData() + n * getInputGrad(i)->getStride();
+          for (int g = 0; g < groups_[i]; ++g) {
+            MatrixPtr w = weights_[i]->getW()->subMatrix(g * K, K);
+            MatrixPtr outGradMat = colBuf_->subMatrix(g * K, K);
+            MatrixPtr inGradMatSub =
+                Matrix::create(preGrad, M, N, false, useGpu_);
+            inGradMatSub->mul(*(w->getTranspose()), *outGradMat, 1.0, 1.0);
+            preGrad += M * N;
+          }
         }
       }
       REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
+      weights_[i]->getParameterPtr()->incUpdate(callback);
     }
   }
 }
-
-void DeConv3DLayer::bpropWeights(int i) { }
-void DeConv3DLayer::bpropData(int i) {  }
+void DeConv3DLayer::bpropWeights(int i) {}
+void DeConv3DLayer::bpropData(int i) {}
 
 void DeConv3DLayer::bpropBiases() {
-  MatrixPtr outGradMat = getOutputGrad();
+  const MatrixPtr &outGradMat = getOutputGrad();
 
   if (this->sharedBiases_) {
     biases_->getWGrad()->collectSharedBias(*outGradMat, 1.0f);

From 43f6cdc8247042244f9b75bac51957c962a16ffd Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Fri, 18 Aug 2017 14:13:25 +0800
Subject: [PATCH 034/170] fix Matrix

---
 paddle/math/Matrix.cpp | 110 +++++++++++++++++++-------------
 paddle/math/Matrix.h   | 140 ++++++++++++++++++++++++++---------------
 2 files changed, 153 insertions(+), 97 deletions(-)

diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp
index 66868e73b3..579a0f3cf3 100644
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
@@ -1389,51 +1389,71 @@ void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) {
       output_d, grad_d, mat_d, height_, width_);
 }
 
-void GpuMatrix::vol2Col(real* data,
-                int channels,
-                int depth,
-                int height,
-                int width,
-                int filterD,
-                int filterH,
-                int filterW,
-                int strideD,
-                int strideH,
-                int strideW,
-                int paddingD,
-                int paddingH,
-                int paddingW) {
-  hl_matrix_vol2Col(data,
-          channels, depth, height, width,
-          filterD, filterH, filterW,
-          strideD, strideH, strideW,
-          paddingD, paddingH, paddingW, getData());
-}
-
-void GpuMatrix::col2Vol(real* trg,
-                int channels,
-                int depth,
-                int height,
-                int width,
-                int filterD,
-                int filterH,
-                int filterW,
-                int strideD,
-                int strideH,
-                int strideW,
-                int paddingD,
-                int paddingH,
-                int paddingW,
-                real alpha,
-                real beta) {
-  hl_matrix_col2Vol(trg,
-                    channels, depth, height, width,
-                    filterD, filterH, filterW,
-                    strideD, strideH, strideW,
-                    paddingD, paddingH, paddingW,
+void GpuMatrix::vol2Col(real* dataSrc,
+                        int channels,
+                        int depth,
+                        int height,
+                        int width,
+                        int filterD,
+                        int filterH,
+                        int filterW,
+                        int strideD,
+                        int strideH,
+                        int strideW,
+                        int paddingD,
+                        int paddingH,
+                        int paddingW) {
+  hl_matrix_vol2Col(dataSrc,
+                    channels,
+                    depth,
+                    height,
+                    width,
+                    filterD,
+                    filterH,
+                    filterW,
+                    strideD,
+                    strideH,
+                    strideW,
+                    paddingD,
+                    paddingH,
+                    paddingW,
+                    getData());
+}
+
+void GpuMatrix::col2Vol(real* dataDst,
+                        int channels,
+                        int depth,
+                        int height,
+                        int width,
+                        int filterD,
+                        int filterH,
+                        int filterW,
+                        int strideD,
+                        int strideH,
+                        int strideW,
+                        int paddingD,
+                        int paddingH,
+                        int paddingW,
+                        real alpha,
+                        real beta) {
+  hl_matrix_col2Vol(dataDst,
+                    channels,
+                    depth,
+                    height,
+                    width,
+                    filterD,
+                    filterH,
+                    filterW,
+                    strideD,
+                    strideH,
+                    strideW,
+                    paddingD,
+                    paddingH,
+                    paddingW,
                     getData(),
-                    alpha, beta);
-   }
+                    alpha,
+                    beta);
+}
 
 /**
  * CpuMatrix
@@ -4082,7 +4102,7 @@ void CpuMatrix::col2Vol(real* trg,
                         real alpha,
                         real beta) {
   real* src = getData();
-  int outDepth =  (depth + 2 * paddingH - filterD) / strideD + 1;
+  int outDepth = (depth + 2 * paddingD - filterD) / strideD + 1;
   int outHeight = (height + 2 * paddingH - filterH) / strideH + 1;
   int outWidth = (width + 2 * paddingW - filterW) / strideW + 1;
   int channelsCol = channels * filterD * filterH * filterW;
diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h
index 4354996ce0..cc3a56f279 100644
--- a/paddle/math/Matrix.h
+++ b/paddle/math/Matrix.h
@@ -1040,40 +1040,40 @@ public:
   }
 
   virtual void vol2Col(real* data,
-                 int channels,
-                 int depth,
-                 int height,
-                 int width,
-                 int filterD,
-                 int filterH,
-                 int filterW,
-                 int strideD,
-                 int strideH,
-                 int strideW,
-                 int paddingD,
-                 int paddingH,
-                 int paddingW) {
-      LOG(FATAL) << "Not implemeted";
-    }
+                       int channels,
+                       int depth,
+                       int height,
+                       int width,
+                       int filterD,
+                       int filterH,
+                       int filterW,
+                       int strideD,
+                       int strideH,
+                       int strideW,
+                       int paddingD,
+                       int paddingH,
+                       int paddingW) {
+    LOG(FATAL) << "Not implemeted";
+  }
 
-    virtual void col2Vol(real* trg,
-                 int channels,
-                 int depth,
-                 int height,
-                 int width,
-                 int filterD,
-                 int filterH,
-                 int filterW,
-                 int strideD,
-                 int strideH,
-                 int strideW,
-                 int paddingD,
-                 int paddingH,
-                 int paddingW,
-                 real alpha,
-                 real beta) {
-      LOG(FATAL) << "Not implemeted";
-    }
+  virtual void col2Vol(real* trg,
+                       int channels,
+                       int depth,
+                       int height,
+                       int width,
+                       int filterD,
+                       int filterH,
+                       int filterW,
+                       int strideD,
+                       int strideH,
+                       int strideW,
+                       int paddingD,
+                       int paddingH,
+                       int paddingW,
+                       real alpha,
+                       real beta) {
+    LOG(FATAL) << "Not implemeted";
+  }
 
   virtual void bilinearForward(const Matrix& in,
                                const size_t inImgH,
@@ -1411,18 +1411,36 @@ public:
                         const real ratioW);
 
   void vol2Col(real* data,
-                 int channels,
-                 int depth, int height, int width,
-                 int filterD, int filterH, int filterW,
-                 int strideD, int strideH, int strideW,
-                 int paddingD, int paddingH, int paddingW);
+               int channels,
+               int depth,
+               int height,
+               int width,
+               int filterD,
+               int filterH,
+               int filterW,
+               int strideD,
+               int strideH,
+               int strideW,
+               int paddingD,
+               int paddingH,
+               int paddingW);
 
   void col2Vol(real* trg,
-               int channels, int depth, int height, int width,
-               int filterD, int filterH, int filterW,
-               int strideD, int strideH, int strideW,
-               int paddingD, int paddingH, int paddingW,
-               real alpha, real beta);
+               int channels,
+               int depth,
+               int height,
+               int width,
+               int filterD,
+               int filterH,
+               int filterW,
+               int strideD,
+               int strideH,
+               int strideW,
+               int paddingD,
+               int paddingH,
+               int paddingW,
+               real alpha,
+               real beta);
 
   void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label);
 
@@ -1767,17 +1785,35 @@ public:
 
   void vol2Col(real* data,
                int channels,
-               int depth, int height, int width,
-               int filterD, int filterH, int filterW,
-               int strideD, int strideH, int strideW,
-               int paddingD, int paddingH, int paddingW);
+               int depth,
+               int height,
+               int width,
+               int filterD,
+               int filterH,
+               int filterW,
+               int strideD,
+               int strideH,
+               int strideW,
+               int paddingD,
+               int paddingH,
+               int paddingW);
 
   void col2Vol(real* trg,
-               int channels, int depth, int height, int width,
-               int filterD, int filterH, int filterW,
-               int strideD, int strideH, int strideW,
-               int paddingD, int paddingH, int paddingW,
-               real alpha, real beta);
+               int channels,
+               int depth,
+               int height,
+               int width,
+               int filterD,
+               int filterH,
+               int filterW,
+               int strideD,
+               int strideH,
+               int strideW,
+               int paddingD,
+               int paddingH,
+               int paddingW,
+               real alpha,
+               real beta);
 
   template <typename ExpressionType>
   void operator=(const ExpressionType& expr) {

From 0a7516d193061ccb35ab410fc947bd245a936159 Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Fri, 18 Aug 2017 14:14:27 +0800
Subject: [PATCH 035/170] fix col2vol vol2col kernel

---
 paddle/cuda/src/hl_cuda_matrix.cu | 192 ++++++++++++++++++++----------
 1 file changed, 129 insertions(+), 63 deletions(-)

diff --git a/paddle/cuda/src/hl_cuda_matrix.cu b/paddle/cuda/src/hl_cuda_matrix.cu
index f626c07a0c..3bf1b0251f 100644
--- a/paddle/cuda/src/hl_cuda_matrix.cu
+++ b/paddle/cuda/src/hl_cuda_matrix.cu
@@ -593,21 +593,28 @@ void hl_matrix_rotate(
   CHECK_SYNC("hl_matrix_rotate failed");
 }
 
-
-__global__ void keMatrixVol2Col(
-        int num_kernels, real*dataSrc, real* dataDst,
-        int depth, int height, int width,
-        int filterD, int filterH, int filterW,
-        int strideD, int strideH, int strideW,
-        int paddingD, int paddingH, int paddingW,
-        int depth_col, int height_col, int width_col){
-
-  for (int index = blockIdx.x * blockDim.x + threadIdx.x;
-       index < num_kernels;
-       index += blockDim.x * gridDim.x){
-
+__global__ void keMatrixVol2Col(int num_kernels,
+                                real* dataSrc,
+                                real* dataDst,
+                                int depth,
+                                int height,
+                                int width,
+                                int filterD,
+                                int filterH,
+                                int filterW,
+                                int strideD,
+                                int strideH,
+                                int strideW,
+                                int paddingD,
+                                int paddingH,
+                                int paddingW,
+                                int depth_col,
+                                int height_col,
+                                int width_col) {
+  for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < num_kernels;
+       index += blockDim.x * gridDim.x) {
     int w_out = index % width_col;
-    int h_out = (index / width_col ) % height_col;
+    int h_out = (index / width_col) % height_col;
     int d_out = (index / width_col / height_col) % depth_col;
     int channel_in = index / width_col / height_col / depth_col;
     int channel_out = channel_in * filterD * filterH * filterW;
@@ -615,7 +622,9 @@ __global__ void keMatrixVol2Col(
     int h_in = h_out * strideH - paddingH;
     int d_in = d_out * strideD - paddingD;
 
-    dataDst += ((channel_out * depth_col + d_out) * height_col + h_out) * width_col + w_out;
+    dataDst +=
+        ((channel_out * depth_col + d_out) * height_col + h_out) * width_col +
+        w_out;
     dataSrc += ((channel_in * depth + d_in) * height + h_in) * width + w_in;
     for (int k = 0; k < filterD; ++k) {
       for (int i = 0; i < filterH; ++i) {
@@ -623,8 +632,10 @@ __global__ void keMatrixVol2Col(
           int d = d_in + k;
           int h = h_in + i;
           int w = w_in + j;
-          *dataDst = (d >= 0 && d < depth && h >= 0 && h < height && w >= 0 && w < width ) ?
-                      dataSrc[(k * height + i) * width + j] : 0;
+          *dataDst = (d >= 0 && d < depth && h >= 0 && h < height && w >= 0 &&
+                      w < width)
+                         ? dataSrc[(k * height + i) * width + j]
+                         : 0;
           dataDst += depth_col * height_col * width_col;
         }
       }
@@ -633,11 +644,20 @@ __global__ void keMatrixVol2Col(
 }
 
 void hl_matrix_vol2Col(real* dataSrc,
-                       int channels, int depth, int height, int width,
-                       int filterD, int filterH, int filterW,
-                       int strideD, int strideH, int strideW,
-                       int paddingD, int paddingH, int paddingW, real* dataDst){
-
+                       int channels,
+                       int depth,
+                       int height,
+                       int width,
+                       int filterD,
+                       int filterH,
+                       int filterW,
+                       int strideD,
+                       int strideH,
+                       int strideW,
+                       int paddingD,
+                       int paddingH,
+                       int paddingW,
+                       real* dataDst) {
   int depth_col = (depth + 2 * paddingD - filterD) / strideD + 1;
   int height_col = (height + 2 * paddingH - filterH) / strideH + 1;
   int width_col = (width + 2 * paddingW - filterW) / strideW + 1;
@@ -646,34 +666,55 @@ void hl_matrix_vol2Col(real* dataSrc,
   const int threads = 512;
   const int blocks = DIVUP(num_kernels, threads);
 
-  keMatrixVol2Col<<< blocks, threads >>>(
-          num_kernels, dataSrc, dataDst,
-                  depth, height, width,
-                  filterD, filterH, filterW,
-                  strideD, strideH, strideW,
-                  paddingD, paddingH, paddingW,
-                  depth_col, height_col, width_col);
+  keMatrixVol2Col<<<blocks, threads>>>(num_kernels,
+                                       dataSrc,
+                                       dataDst,
+                                       depth,
+                                       height,
+                                       width,
+                                       filterD,
+                                       filterH,
+                                       filterW,
+                                       strideD,
+                                       strideH,
+                                       strideW,
+                                       paddingD,
+                                       paddingH,
+                                       paddingW,
+                                       depth_col,
+                                       height_col,
+                                       width_col);
   CHECK_SYNC("hl_matrix_vol2Col failed");
 }
 
-__global__ void keMatrixCol2Vol(
-        int num_kernels, real*dataDst, real* dataSrc,
-        int depth, int height, int width,
-        int filterD, int filterH, int filterW,
-        int strideD, int strideH, int strideW,
-        int paddingD, int paddingH, int paddingW,
-        int depth_col, int height_col, int width_col,
-        real alpha, real beta){
-
-  for (int index = blockIdx.x * blockDim.x + threadIdx.x;
-       index < num_kernels;
+__global__ void keMatrixCol2Vol(int num_kernels,
+                                real* dataDst,
+                                real* dataSrc,
+                                int depth,
+                                int height,
+                                int width,
+                                int filterD,
+                                int filterH,
+                                int filterW,
+                                int strideD,
+                                int strideH,
+                                int strideW,
+                                int paddingD,
+                                int paddingH,
+                                int paddingW,
+                                int depth_col,
+                                int height_col,
+                                int width_col,
+                                real alpha,
+                                real beta) {
+  for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < num_kernels;
        index += blockDim.x * gridDim.x) {
-
-    real val = 0;
+    real srcVal = 0;
+    real dstVal = dataDst[index];
     int w = index % width + paddingW;
     int h = (index / width) % height + paddingH;
     int d = (index / width / height) % depth + paddingD;
-    int c = index / (width * height * depth);
+    int c = index / width / height / depth;
     // compute the start and end of the output
     int w_col_start = (w < filterW) ? 0 : (w - filterW) / strideW + 1;
     int w_col_end = min(w / strideW + 1, width_col);
@@ -682,32 +723,45 @@ __global__ void keMatrixCol2Vol(
     int d_col_start = (d < filterD) ? 0 : (d - filterD) / strideD + 1;
     int d_col_end = min(d / strideD + 1, depth_col);
 
-    int offset = (c * filterD * filterW * filterH + \
-                  d * filterW * filterH + h * filterW + w) * depth_col * height_col * width_col;
+    int offset = (c * filterD * filterW * filterH + d * filterW * filterH +
+                  h * filterW + w) *
+                 depth_col * height_col * width_col;
 
-    int coeff_d_col = (1 - strideD * filterW * filterH * depth_col) * height_col * width_col;
-    int coeff_h_col = (1 - strideH * filterW * depth_col * height_col) * width_col;
+    int coeff_d_col =
+        (1 - strideD * filterW * filterH * depth_col) * height_col * width_col;
+    int coeff_h_col =
+        (1 - strideH * filterW * depth_col * height_col) * width_col;
     int coeff_w_col = (1 - strideW * depth_col * height_col * width_col);
 
     for (int d_col = d_col_start; d_col < d_col_end; ++d_col) {
       for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
         for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
-          val += dataSrc[offset + d_col * coeff_d_col + h_col * coeff_h_col + w_col * coeff_w_col];
+          srcVal += dataSrc[offset + d_col * coeff_d_col + h_col * coeff_h_col +
+                            w_col * coeff_w_col];
         }
       }
     }
-    dataDst[index] = val;
+    dataDst[index] = alpha * srcVal + beta * dstVal;
   }
 }
 
 void hl_matrix_col2Vol(real* dataDst,
-                       int channels, int depth, int height, int width,
-                       int filterD, int filterH, int filterW,
-                       int strideD, int strideH, int strideW,
-                       int paddingD, int paddingH, int paddingW,
+                       int channels,
+                       int depth,
+                       int height,
+                       int width,
+                       int filterD,
+                       int filterH,
+                       int filterW,
+                       int strideD,
+                       int strideH,
+                       int strideW,
+                       int paddingD,
+                       int paddingH,
+                       int paddingW,
                        real* dataSrc,
-                       real alpha, real beta){
-
+                       real alpha,
+                       real beta) {
   int depth_col = (depth + 2 * paddingD - filterD) / strideD + 1;
   int height_col = (height + 2 * paddingH - filterH) / strideH + 1;
   int width_col = (width + 2 * paddingW - filterW) / strideW + 1;
@@ -716,14 +770,26 @@ void hl_matrix_col2Vol(real* dataDst,
   const int threads = 512;
   const int blocks = DIVUP(num_kernels, threads);
 
-  keMatrixCol2Vol<<< blocks, threads >>>(
-          num_kernels, dataDst, dataSrc,
-                  depth, height, width,
-                  filterD, filterH, filterW,
-                  strideD, strideH, strideW,
-                  paddingD, paddingH, paddingW,
-                  depth_col, height_col, width_col,
-                  alpha, beta);
+  keMatrixCol2Vol<<<blocks, threads>>>(num_kernels,
+                                       dataDst,
+                                       dataSrc,
+                                       depth,
+                                       height,
+                                       width,
+                                       filterD,
+                                       filterH,
+                                       filterW,
+                                       strideD,
+                                       strideH,
+                                       strideW,
+                                       paddingD,
+                                       paddingH,
+                                       paddingW,
+                                       depth_col,
+                                       height_col,
+                                       width_col,
+                                       alpha,
+                                       beta);
 
   CHECK_SYNC("hl_matrix_col2Vol failed");
 }

From 62e6dac402ca63b402b5dfd1d7649cba1e258d41 Mon Sep 17 00:00:00 2001
From: tensor-tang <jian.j.tang@intel.com>
Date: Fri, 18 Aug 2017 14:30:09 +0800
Subject: [PATCH 036/170] add MKLDNNMatrix files

---
 paddle/gserver/layers/MKLDNNLayer.h |  1 +
 paddle/math/CMakeLists.txt          | 15 ++++++++++
 paddle/math/MKLDNNMatrix.cpp        | 19 ++++++++++++
 paddle/math/MKLDNNMatrix.h          | 45 +++++++++++++++++++++++++++++
 4 files changed, 80 insertions(+)
 create mode 100644 paddle/math/MKLDNNMatrix.cpp
 create mode 100644 paddle/math/MKLDNNMatrix.h

diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h
index 63e29f447e..9533027fa6 100644
--- a/paddle/gserver/layers/MKLDNNLayer.h
+++ b/paddle/gserver/layers/MKLDNNLayer.h
@@ -18,6 +18,7 @@ limitations under the License. */
 #include "Layer.h"
 #include "MKLDNNBase.h"
 #include "mkldnn.hpp"
+#include "paddle/math/MKLDNNMatrix.h"
 
 DECLARE_bool(use_mkldnn);
 DECLARE_bool(use_mkldnn_wgt);
diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt
index bf28092e82..ad6de18c81 100644
--- a/paddle/math/CMakeLists.txt
+++ b/paddle/math/CMakeLists.txt
@@ -14,6 +14,21 @@
 #
 file(GLOB MATH_HEADERS . *.h)
 file(GLOB MATH_SOURCES . *.cpp)
+
+message(STATUS "----------MATH_HEADERS:${MATH_HEADERS}")
+message(STATUS "----------MATH_SOURCES:${MATH_SOURCES}")
+if(NOT WITH_MKLDNN)
+    file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h")
+    file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp")
+    message(STATUS "----------DNN_HEADER:${DNN_HEADER}")
+    message(STATUS "----------DNN_SOURCES:${DNN_SOURCES}")
+    list(REMOVE_ITEM MATH_HEADERS ${DNN_HEADER})
+    list(REMOVE_ITEM MATH_SOURCES ${DNN_SOURCES})
+    message(STATUS "Skip compiling with MKLDNNMatrix")
+else()
+    message(STATUS "Compile with MKLDNNMatrix")
+endif()
+
 set(MATH_SOURCES
     "${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu"
     "${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu"
diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp
new file mode 100644
index 0000000000..df8e72d78b
--- /dev/null
+++ b/paddle/math/MKLDNNMatrix.cpp
@@ -0,0 +1,19 @@
+/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "MKLDNNMatrix.h"
+
+using namespace mkldnn;  // NOLINT
+
+namespace paddle {}  // namespace paddle
diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h
new file mode 100644
index 0000000000..91ef56f2c3
--- /dev/null
+++ b/paddle/math/MKLDNNMatrix.h
@@ -0,0 +1,45 @@
+/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+//#include "Matrix.h"
+#include "Vector.h"
+
+#include "mkldnn.hpp"
+#include "paddle/parameter/Parameter.h"
+
+namespace paddle {
+
+static const std::map<mkldnn::memory::format, PARAM_FORMAT> PARAM_FOARMAT_MAP =
+    {{mkldnn::memory::format::oi, PARAM_FORMAT_MKLDNN_OI}};
+
+class MKLDNNMatrix;
+typedef std::shared_ptr<MKLDNNMatrix> MKLDNNMatrixPtr;
+
+/**
+ * @brief MKLDNN Matrix.
+ *
+ */
+class MKLDNNMatrix : public CpuVector {
+public:
+  explicit MKLDNNMatrix(size_t size, int fmt) : CpuVector(size), fmt_(fmt) {}
+
+  ~MKLDNNMatrix() {}
+
+protected:
+  int fmt_;
+};
+
+}  // namespace paddle

From 38cc5dadcc5c76c4aa50f5e92b560f4ccaba9227 Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Fri, 18 Aug 2017 16:43:59 +0800
Subject: [PATCH 037/170] modified bias shape of ConvLayer

---
 paddle/gserver/layers/Conv3DLayer.cpp   |  5 -----
 paddle/gserver/layers/ConvBaseLayer.cpp | 17 ++++++++---------
 paddle/gserver/layers/DeConv3DLayer.cpp |  5 -----
 3 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/paddle/gserver/layers/Conv3DLayer.cpp b/paddle/gserver/layers/Conv3DLayer.cpp
index 5609a4cc73..106909824d 100644
--- a/paddle/gserver/layers/Conv3DLayer.cpp
+++ b/paddle/gserver/layers/Conv3DLayer.cpp
@@ -37,11 +37,6 @@ bool Conv3DLayer::init(const LayerMap &layerMap,
           weights_[index]->getWGrad()->getHeight());
     ++index;
   }
-  if (nullptr != biases_->getWGrad())
-    biases_->getWGrad()->reshape(biases_->getWGrad()->width_,
-                                 biases_->getWGrad()->height_);
-  if (nullptr != biases_->getW())
-    biases_->getW()->reshape(biases_->getW()->width_, biases_->getW()->height_);
   CHECK(inputLayers_.size() == parameters_.size());
   return true;
 }
diff --git a/paddle/gserver/layers/ConvBaseLayer.cpp b/paddle/gserver/layers/ConvBaseLayer.cpp
index e437b0b86e..6bcbe0ddb2 100644
--- a/paddle/gserver/layers/ConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ConvBaseLayer.cpp
@@ -21,11 +21,10 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
                          const ParameterMap& parameterMap) {
   /* Initialize the basic parent class */
   Layer::init(layerMap, parameterMap);
-  isDeconv_ = (config_.type() == "exconv" ||
-          config_.type() == "cudnn_conv" ||
-          config_.type() == "conv3d" ||
-          config_.type() == "deconv3d"   )
-                  ? false : true;
+  isDeconv_ = (config_.type() == "exconv" || config_.type() == "cudnn_conv" ||
+               config_.type() == "conv3d" || config_.type() == "deconv3d")
+                  ? false
+                  : true;
 
   /* Initialize the convolutional layer parameter */
   numFilters_ = config_.num_filters();
@@ -52,8 +51,8 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
     filterSizeZ_.push_back(conf.filter_size_z());
     imgSizeD_.push_back(conf.img_size_z());
     outputD_.push_back(conf.output_z());
-    filterPixels_.push_back(
-            filterSize_.back() * filterSizeY_.back() * filterSizeZ_.back());
+    filterPixels_.push_back(filterSize_.back() * filterSizeY_.back() *
+                            filterSizeZ_.back());
   }
 
   CHECK(inputLayers_.size() == parameters_.size());
@@ -73,10 +72,10 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
     if (sharedBiases_) {
       CHECK_EQ((size_t)numFilters_, biasParameter_->getSize());
       biases_ =
-          std::unique_ptr<Weight>(new Weight(numFilters_, 1, biasParameter_));
+          std::unique_ptr<Weight>(new Weight(1, numFilters_, biasParameter_));
     } else {
       biases_ =
-          std::unique_ptr<Weight>(new Weight(getSize(), 1, biasParameter_));
+          std::unique_ptr<Weight>(new Weight(1, getSize(), biasParameter_));
     }
   }
 
diff --git a/paddle/gserver/layers/DeConv3DLayer.cpp b/paddle/gserver/layers/DeConv3DLayer.cpp
index 286f5b985c..5a54a68447 100644
--- a/paddle/gserver/layers/DeConv3DLayer.cpp
+++ b/paddle/gserver/layers/DeConv3DLayer.cpp
@@ -39,11 +39,6 @@ bool DeConv3DLayer::init(const LayerMap &layerMap,
       weights_[index]->getWGrad()->reshape(filterPixels_[index] * numFilters_,
                                            filterChannels_[index]);
   }
-  if (biases_->getWGrad())
-    biases_->getWGrad()->reshape(biases_->getWGrad()->width_,
-                                 biases_->getWGrad()->height_);
-  if (biases_->getW())
-    biases_->getW()->reshape(biases_->getW()->width_, biases_->getW()->height_);
   CHECK(inputLayers_.size() == parameters_.size());
   return true;
 }

From 3065cb26258e1a7a014c6e367747214615832c3a Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Fri, 18 Aug 2017 17:43:06 +0800
Subject: [PATCH 038/170] add huber_regression_cost

---
 doc/api/v2/config/layer.rst                   |  5 ++
 paddle/gserver/layers/CostLayer.cpp           | 55 +++++++++++++++++++
 paddle/gserver/layers/CostLayer.h             | 24 ++++++++
 paddle/gserver/tests/test_LayerGrad.cpp       | 20 ++++++-
 proto/ModelConfig.proto                       |  3 +
 python/paddle/trainer/config_parser.py        | 11 ++++
 .../paddle/trainer_config_helpers/layers.py   | 53 ++++++++++++++++++
 .../protostr/test_cost_layers.protostr        | 17 ++++++
 .../tests/configs/test_cost_layers.py         |  2 +
 python/paddle/v2/tests/test_layer.py          |  5 +-
 10 files changed, 192 insertions(+), 3 deletions(-)

diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst
index 22a6b2ab84..9a5901616f 100644
--- a/doc/api/v2/config/layer.rst
+++ b/doc/api/v2/config/layer.rst
@@ -409,6 +409,11 @@ multi_binary_label_cross_entropy_cost
 ..  autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost
     :noindex:
 
+huber_regression_cost
+-------------------------
+..  autoclass:: paddle.v2.layer.huber_regression_cost
+    :noindex:
+
 huber_classification_cost
 -------------------------
 ..  autoclass:: paddle.v2.layer.huber_classification_cost
diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp
index 69cf393225..91a742422e 100644
--- a/paddle/gserver/layers/CostLayer.cpp
+++ b/paddle/gserver/layers/CostLayer.cpp
@@ -594,6 +594,61 @@ void HuberCost::forwardImp(Matrix& output, Argument& label, Matrix& cost) {
   }
 }
 
+//
+// Huber loss for robust regression.
+//
+REGISTER_LAYER(huber_regression, HuberRegressionLoss);
+
+bool HuberRegressionLoss::init(const LayerMap& layerMap,
+                               const ParameterMap& parameterMap) {
+  HuberCost::init(layerMap, parameterMap);
+  delta_ = config_.delta();
+  return true;
+}
+
+void HuberRegressionLoss::forwardImp(Matrix& output,
+                                     Argument& label,
+                                     Matrix& target) {
+  HuberCost::forwardImp(output, label, target);
+  size_t numSamples = target.getHeight();
+  CHECK(label.value);
+  CHECK_EQ((*label.value).getHeight(), numSamples);
+  CHECK_EQ(output.getHeight(), numSamples);
+  CHECK_EQ(output.getWidth(), (*label.value).getWidth());
+  CHECK_EQ(target.getWidth(), (size_t)1);
+
+  real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
+  real* lbl =
+      useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData();
+  std::vector<real> cost(numSamples);
+  for (size_t i = 0; i < numSamples; ++i) {
+    real a = std::abs(lbl[i] - out[i]);
+    if (a <= delta_)
+      cost[i] = a * a / 2;
+    else
+      cost[i] = delta_ * (a - delta_ / 2);
+  }
+  target.copyFrom(cost.data(), numSamples);
+}
+
+void HuberRegressionLoss::backwardImp(Matrix& output,
+                                      Argument& label,
+                                      Matrix& outputG) {
+  size_t numSamples = output.getHeight();
+  real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
+  real* lbl =
+      useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData();
+  real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData();
+  for (size_t i = 0; i < numSamples; ++i) {
+    real a = lbl[i] - out[i];
+    if (std::abs(a) <= delta_)
+      grad[i] += -a;
+    else
+      grad[i] += a > 0 ? delta_ : -delta_;
+  }
+  if (useGpu_) outputG.copyFrom(grad, numSamples);
+}
+
 //
 // Huber loss for robust 2-classes classification
 //
diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h
index c006dc8110..0ce72ef40a 100644
--- a/paddle/gserver/layers/CostLayer.h
+++ b/paddle/gserver/layers/CostLayer.h
@@ -321,6 +321,30 @@ public:
   void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {}
 };
 
+/**
+ * Huber loss for robust regression.
+ *
+ * Given output f(x), label y and delta, the loss is:
+ * Loss = 0.5 * (1 - y * f)^2, if abs(y - f) <= delta \\
+ * Loss = delta * abs(y - f) - 0.5 * delta^2, otherwise
+ */
+class HuberRegressionLoss : public HuberCost {
+public:
+  explicit HuberRegressionLoss(const LayerConfig& config) : HuberCost(config) {}
+
+  bool init(const LayerMap& layerMap,
+            const ParameterMap& parameterMap) override;
+
+  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
+
+  void backwardImp(Matrix& outputValue,
+                   Argument& label,
+                   Matrix& outputGrad) override;
+
+protected:
+  real delta_;
+};
+
 /**
  * Huber loss for robust 2-classes classification.
  *
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index 6d60250f6d..c522b20f0e 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -828,6 +828,24 @@ TEST(Layer, square_error_weighted) {
   }
 }
 
+TEST(Layer, huber_regression_loss) {
+  TestConfig config;
+  config.layerConfig.set_type("huber_regression");
+  config.biasSize = 0;
+
+  config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
+  config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0});
+  config.layerConfig.add_inputs();
+  config.layerConfig.add_inputs();
+
+  for (auto useGpu : {false, true}) {
+    for (auto delta : {1, 3, 5}) {
+      config.layerConfig.set_delta(delta);
+      testLayerGrad(config, "huber_regression", 100, /* trans */ false, useGpu);
+    }
+  }
+}
+
 TEST(Layer, huber_two_class) {
   TestConfig config;
   config.layerConfig.set_type("huber_classification");
@@ -839,7 +857,7 @@ TEST(Layer, huber_two_class) {
   config.layerConfig.add_inputs();
 
   for (auto useGpu : {false, true}) {
-    testLayerGrad(config, "huber", 100, /* trans */ false, useGpu);
+    testLayerGrad(config, "huber_two_class", 100, /* trans */ false, useGpu);
   }
 }
 
diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto
index 4f3d5bf3f6..e19e0f85f3 100644
--- a/proto/ModelConfig.proto
+++ b/proto/ModelConfig.proto
@@ -496,6 +496,9 @@ message LayerConfig {
   optional int32 axis = 54 [ default = 2 ];
   repeated uint32 offset = 55;
   repeated uint32 shape = 56;
+
+  // for HuberRegressionLoss
+  optional double delta = 57 [ default = 1.0 ];
 }
 
 message EvaluatorConfig {
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 248da9417f..a3ca3f2510 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -2317,6 +2317,17 @@ class LambdaCost(LayerBase):
         self.config.max_sort_size = max_sort_size
 
 
+@config_layer('huber_regression')
+class HuberRegressionLoss(LayerBase):
+    def __init__(self, name, inputs, delta=1., coeff=1., device=None):
+        super(HuberRegressionLoss, self).__init__(
+            name, 'huber_regression', 1, inputs=inputs, device=device)
+        config_assert(
+            len(self.inputs) == 2, 'HuberRegression must have 2 inputs')
+        self.config.delta = delta
+        self.config.coeff = coeff
+
+
 @config_layer('nce')
 class NCELayer(LayerBase):
     def __init__(self,
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 20d96efe15..d61c94dc82 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -108,6 +108,7 @@ __all__ = [
     'sum_cost',
     'rank_cost',
     'lambda_cost',
+    'huber_regression_cost',
     'huber_classification_cost',
     'block_expand_layer',
     'maxout_layer',
@@ -216,6 +217,7 @@ class LayerType(object):
 
     RANK_COST = 'rank-cost'
     LAMBDA_COST = 'lambda_cost'
+    HUBER_REGRESSION = 'huber_regression'
     HUBER_CLASSIFICATION = 'huber_classification'
     CROSS_ENTROPY = 'multi-class-cross-entropy'
     CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm'
@@ -5603,6 +5605,57 @@ def sum_cost(input, name=None, layer_attr=None):
     return LayerOutput(name, LayerType.SUM_COST, parents=[input], size=1)
 
 
+@wrap_name_default()
+@layer_support()
+def huber_regression_cost(input,
+                          label,
+                          name=None,
+                          delta=1.0,
+                          coeff=1.0,
+                          layer_attr=None):
+    """
+    In statistics, the Huber loss is a loss function used in robust regression, 
+    that is less sensitive to outliers in data than the squared error loss. 
+    Given a prediction f(x), a label y and :math:`\delta`, the loss function 
+    is defined as:
+
+    .. math:
+       loss = 0.5*\left ( y-f(x) \right )^2, \left | y-f(x) \right |\leq \delta
+       loss = \delta \left | y-f(x) \right |-0.5\delta ^2, otherwise
+
+    The example usage is:
+
+    .. code-block:: python
+
+       cost = huber_regression_cost(input=input_layer, label=label_layer)
+
+    :param input: The first input layer.
+    :type input: LayerOutput.
+    :param label: The input label.
+    :type input: LayerOutput.
+    :param name: The name of this layers. It is not necessary.
+    :type name: None|basestring.
+    :param delta: The difference between the observed and predicted values.
+    :type delta: float.
+    :param coeff: The coefficient affects the gradient in the backward.
+    :type coeff: float.
+    :param layer_attr: Extra Layer Attribute.
+    :type layer_attr: ExtraLayerAttribute
+    :return: LayerOutput object.
+    :rtype: LayerOutput.
+    """
+    assert isinstance(input, LayerOutput)
+    Layer(
+        name=name,
+        type=LayerType.HUBER_REGRESSION,
+        inputs=[input.name, label.name],
+        delta=delta,
+        coeff=coeff,
+        **ExtraLayerAttribute.to_kwargs(layer_attr))
+    return LayerOutput(
+        name, LayerType.HUBER_REGRESSION, parents=[input, label], size=1)
+
+
 @wrap_name_default()
 @layer_support()
 def huber_classification_cost(input,
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr
index a64e5ea0dd..55ab464ddf 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr
@@ -167,6 +167,20 @@ layers {
   softmax_selfnorm_alpha: 0.1
   coeff: 1.0
 }
+layers {
+  name: "__huber_regression_cost_0__"
+  type: "huber_regression"
+  size: 1
+  active_type: ""
+  inputs {
+    input_layer_name: "input"
+  }
+  inputs {
+    input_layer_name: "labels"
+  }
+  coeff: 1.0
+  delta: 1.0
+}
 layers {
   name: "huber_probs"
   type: "data"
@@ -300,6 +314,7 @@ output_layer_names: "__rank_cost_0__"
 output_layer_names: "__lambda_cost_0__"
 output_layer_names: "__cross_entropy_0__"
 output_layer_names: "__cross_entropy_with_selfnorm_0__"
+output_layer_names: "__huber_regression_cost_0__"
 output_layer_names: "__huber_classification_cost_0__"
 output_layer_names: "__multi_binary_label_cross_entropy_0__"
 output_layer_names: "__sum_cost_0__"
@@ -324,6 +339,7 @@ sub_models {
   layer_names: "__lambda_cost_0__"
   layer_names: "__cross_entropy_0__"
   layer_names: "__cross_entropy_with_selfnorm_0__"
+  layer_names: "__huber_regression_cost_0__"
   layer_names: "huber_probs"
   layer_names: "huber_label"
   layer_names: "__huber_classification_cost_0__"
@@ -349,6 +365,7 @@ sub_models {
   output_layer_names: "__lambda_cost_0__"
   output_layer_names: "__cross_entropy_0__"
   output_layer_names: "__cross_entropy_with_selfnorm_0__"
+  output_layer_names: "__huber_regression_cost_0__"
   output_layer_names: "__huber_classification_cost_0__"
   output_layer_names: "__multi_binary_label_cross_entropy_0__"
   output_layer_names: "__sum_cost_0__"
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
index 98bf026d60..7ce375c708 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
@@ -33,6 +33,8 @@ outputs(
         input=probs, label=xe_label),
     cross_entropy_with_selfnorm(
         input=probs, label=xe_label),
+    huber_regression_cost(
+        input=seq_in, label=labels),
     huber_classification_cost(
         input=data_layer(
             name='huber_probs', size=1),
diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py
index 7373a55ce6..783a0ca85d 100644
--- a/python/paddle/v2/tests/test_layer.py
+++ b/python/paddle/v2/tests/test_layer.py
@@ -141,12 +141,13 @@ class CostLayerTest(unittest.TestCase):
         cost8 = layer.rank_cost(left=score, right=score, label=score)
         cost9 = layer.lambda_cost(input=inference, score=score)
         cost10 = layer.sum_cost(input=inference)
-        cost11 = layer.huber_classification_cost(input=score, label=label)
+        cost11 = layer.huber_regression_cost(input=score, label=label)
+        cost12 = layer.huber_classification_cost(input=score, label=label)
 
         print layer.parse_network([cost1, cost2])
         print layer.parse_network([cost3, cost4])
         print layer.parse_network([cost5, cost6])
-        print layer.parse_network([cost7, cost8, cost9, cost10, cost11])
+        print layer.parse_network([cost7, cost8, cost9, cost10, cost11, cost12])
 
         crf = layer.crf(input=inference, label=label)
         crf_decoding = layer.crf_decoding(input=inference, size=3)

From 59b3df31aa3f960753bf0d0d922319124e04301e Mon Sep 17 00:00:00 2001
From: Yu Yang <yuyang18@baidu.com>
Date: Sun, 20 Aug 2017 14:52:23 +0800
Subject: [PATCH 039/170] Extract OpInfo into a library

Fix cycle dependencies, Fix #3583.
---
 paddle/framework/CMakeLists.txt     |  4 +--
 paddle/framework/backward_test.cc   |  4 +--
 paddle/framework/grad_op_builder.cc | 20 +++++++-------
 paddle/framework/op_info.cc         | 30 +++++++++++++++++++++
 paddle/framework/op_info.h          | 42 +++++++++++++++++++++++++++++
 paddle/framework/op_registry.cc     | 37 +++++++++++++------------
 paddle/framework/op_registry.h      | 35 ++++++------------------
 paddle/framework/operator.cc        |  8 +++---
 paddle/framework/operator.h         | 27 ++++++++++---------
 paddle/framework/operator_test.cc   |  9 ++++---
 paddle/framework/pybind.cc          |  2 +-
 paddle/operators/net_op.cc          |  5 ++--
 paddle/operators/net_op.h           |  6 +++--
 paddle/operators/recurrent_op.cc    |  8 +++---
 paddle/operators/recurrent_op.h     | 10 ++++---
 15 files changed, 152 insertions(+), 95 deletions(-)
 create mode 100644 paddle/framework/op_info.cc
 create mode 100644 paddle/framework/op_info.h

diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index 68304c9fc8..59012ea8c1 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -18,8 +18,8 @@ cc_test(scope_test SRCS scope_test.cc DEPS scope)
 proto_library(framework_proto SRCS framework.proto)
 
 cc_library(attribute SRCS attribute.cc DEPS framework_proto)
-
-cc_library(operator SRCS operator.cc DEPS framework_proto device_context tensor scope attribute)
+cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
+cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope)
 cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
 
 cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS operator)
diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc
index 2c5ec76dfe..bcdfae132c 100644
--- a/paddle/framework/backward_test.cc
+++ b/paddle/framework/backward_test.cc
@@ -72,8 +72,8 @@ class NoGradOpMaker : public OpProtoAndCheckerMaker {
 
 class FcOp : public operators::NetOp {
  public:
-  FcOp(const std::string &type, const VarNameMap &inputs,
-       const VarNameMap &outputs, const AttributeMap &attrs)
+  FcOp(const std::string &type, const VariableNameMap &inputs,
+       const VariableNameMap &outputs, const AttributeMap &attrs)
       : NetOp(type, inputs, outputs, attrs) {
     AddOp(OpRegistry::CreateOp("mul",
                                {{"X", {Input("X")}}, {"Y", {Input("W")}}},
diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc
index 0a2a41f6b6..fcc5d7a216 100644
--- a/paddle/framework/grad_op_builder.cc
+++ b/paddle/framework/grad_op_builder.cc
@@ -20,11 +20,11 @@ namespace framework {
 enum class OpArgType { IN, OUT };
 
 static void TransOpArg(const OperatorBase* src_op, const OpArgType& src_type,
-                       bool is_grad, OperatorBase::VarNameMap* vars) {
+                       bool is_grad, VariableNameMap* vars) {
   const auto& src_inout =
       src_type == OpArgType::IN ? src_op->Inputs() : src_op->Outputs();
   auto& dst_inout = *vars;
-  const OpProto* proto = OpRegistry::op_info_map().at(src_op->Type()).proto_;
+  const OpProto* proto = OpInfoMap().at(src_op->Type()).proto_;
   const auto& src_arg_list =
       src_type == OpArgType::IN ? proto->inputs() : proto->outputs();
   for (const auto& arg : src_arg_list) {
@@ -40,25 +40,25 @@ static void TransOpArg(const OperatorBase* src_op, const OpArgType& src_type,
 }
 
 OperatorBase* BuildGradOp(const OperatorBase* op) {
-  auto it = OpRegistry::op_info_map().find(op->Type());
-  PADDLE_ENFORCE(it != OpRegistry::op_info_map().end(),
-                 "'%s' has not been registered.", op->Type());
+  auto it = OpInfoMap().find(op->Type());
+  PADDLE_ENFORCE(it != OpInfoMap().end(), "'%s' has not been registered.",
+                 op->Type());
   PADDLE_ENFORCE(it->second.proto_ != nullptr, "'%s' has no OpProto.",
                  op->Type());
   std::string grad_op_type = it->second.grad_op_type_;
   PADDLE_ENFORCE(!grad_op_type.empty(), "'%s' has no gradient operator.",
                  op->Type());
 
-  OperatorBase::VarNameMap inputs;
-  OperatorBase::VarNameMap outputs;
+  VariableNameMap inputs;
+  VariableNameMap outputs;
   TransOpArg(op, OpArgType::IN, false, &inputs);   // I
   TransOpArg(op, OpArgType::OUT, false, &inputs);  // O
   TransOpArg(op, OpArgType::OUT, true, &inputs);   // OG
   TransOpArg(op, OpArgType::IN, true, &outputs);   // IG
 
-  it = OpRegistry::op_info_map().find(grad_op_type);
-  PADDLE_ENFORCE(it != OpRegistry::op_info_map().end(),
-                 "'%s' has not been registered.", grad_op_type);
+  it = OpInfoMap().find(grad_op_type);
+  PADDLE_ENFORCE(it != OpInfoMap().end(), "'%s' has not been registered.",
+                 grad_op_type);
   return it->second.creator_(grad_op_type, inputs, outputs, op->Attrs());
 }
 
diff --git a/paddle/framework/op_info.cc b/paddle/framework/op_info.cc
new file mode 100644
index 0000000000..f928ac6473
--- /dev/null
+++ b/paddle/framework/op_info.cc
@@ -0,0 +1,30 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/framework/op_info.h"
+
+namespace paddle {
+namespace framework {
+
+static std::unordered_map<std::string, const paddle::framework::OpInfo>*
+    g_op_info_map = nullptr;
+std::unordered_map<std::string, const paddle::framework::OpInfo>& OpInfoMap() {
+  if (g_op_info_map == nullptr) {
+    g_op_info_map =
+        new std::unordered_map<std::string, const paddle::framework::OpInfo>();
+  }
+  return *g_op_info_map;
+}
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/framework/op_info.h b/paddle/framework/op_info.h
new file mode 100644
index 0000000000..fdd0ed77d4
--- /dev/null
+++ b/paddle/framework/op_info.h
@@ -0,0 +1,42 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#pragma once
+#include <functional>
+#include <map>
+#include <string>
+#include <unordered_map>
+
+#include "paddle/framework/attribute.h"
+
+namespace paddle {
+namespace framework {
+class OperatorBase;
+using VariableNameMap = std::map<std::string, std::vector<std::string>>;
+
+using OpCreator = std::function<OperatorBase*(
+    const std::string& /*type*/, const VariableNameMap& /*inputs*/,
+    const VariableNameMap& /*outputs*/, const AttributeMap& /*attrs*/)>;
+
+struct OpInfo {
+  OpCreator creator_;
+  std::string grad_op_type_;
+  OpProto* proto_;
+  OpAttrChecker* checker_;
+};
+
+extern std::unordered_map<std::string, const OpInfo>& OpInfoMap();
+
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/framework/op_registry.cc b/paddle/framework/op_registry.cc
index 8eae86e960..e03dc3a73d 100644
--- a/paddle/framework/op_registry.cc
+++ b/paddle/framework/op_registry.cc
@@ -19,32 +19,20 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 
-std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const std::string& type,
-                                                   const VarNameMap& inputs,
-                                                   const VarNameMap& outputs,
-                                                   AttributeMap attrs) {
-  auto it = op_info_map().find(type);
-  PADDLE_ENFORCE(it != op_info_map().end(),
+std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
+    const std::string& type, const VariableNameMap& inputs,
+    const VariableNameMap& outputs, AttributeMap attrs) {
+  auto it = OpInfoMap().find(type);
+  PADDLE_ENFORCE(it != OpInfoMap().end(),
                  "Operator '%s' has not been registered.", type);
   it->second.checker_->Check(attrs);
   auto op = it->second.creator_(type, inputs, outputs, attrs);
   return std::unique_ptr<OperatorBase>(op);
 }
 
-std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDesc& op_desc) {
-  VarNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs());
-  VarNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs());
-  AttributeMap attrs;
-  for (auto& attr : op_desc.attrs()) {
-    attrs[attr.name()] = GetAttrValue(attr);
-  }
-
-  return CreateOp(op_desc.type(), inputs, outputs, attrs);
-}
-
-OperatorBase::VarNameMap OpRegistry::ConvertOpDescVarsToVarNameMap(
+static VariableNameMap ConvertOpDescVarsToVarNameMap(
     const google::protobuf::RepeatedPtrField<OpDesc::Var>& op_desc_vars) {
-  VarNameMap ret_val;
+  VariableNameMap ret_val;
   for (auto& var : op_desc_vars) {
     auto& var_names = ret_val[var.parameter()];
     auto& var_names_in_proto = var.arguments();
@@ -55,6 +43,17 @@ OperatorBase::VarNameMap OpRegistry::ConvertOpDescVarsToVarNameMap(
   return ret_val;
 }
 
+std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDesc& op_desc) {
+  VariableNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs());
+  VariableNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs());
+  AttributeMap attrs;
+  for (auto& attr : op_desc.attrs()) {
+    attrs[attr.name()] = GetAttrValue(attr);
+  }
+
+  return CreateOp(op_desc.type(), inputs, outputs, attrs);
+}
+
 std::unique_ptr<OperatorBase> OpRegistry::CreateGradOp(const OperatorBase& op) {
   PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops");
   return std::unique_ptr<OperatorBase>(BuildGradOp(&op));
diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h
index 4c2d13d639..06530bc7d0 100644
--- a/paddle/framework/op_registry.h
+++ b/paddle/framework/op_registry.h
@@ -23,6 +23,7 @@ limitations under the License. */
 #include "paddle/framework/attribute.h"
 #include "paddle/framework/framework.pb.h"
 #include "paddle/framework/grad_op_builder.h"
+#include "paddle/framework/op_info.h"
 #include "paddle/framework/operator.h"
 #include "paddle/framework/scope.h"
 
@@ -30,28 +31,16 @@ namespace paddle {
 namespace framework {
 
 class OpRegistry {
-  using VarNameMap = OperatorBase::VarNameMap;
-  using OpCreator = std::function<OperatorBase*(
-      const std::string& /*type*/, const VarNameMap& /*inputs*/,
-      const VarNameMap& /*outputs*/, const AttributeMap& /*attrs*/)>;
-
  public:
-  struct OpInfo {
-    OpCreator creator_;
-    std::string grad_op_type_;
-    OpProto* proto_;
-    OpAttrChecker* checker_;
-  };
-
   template <typename OpType, typename ProtoMakerType, typename GradOpType>
   static void RegisterOp(const std::string& op_type,
                          const std::string& grad_op_type) {
-    PADDLE_ENFORCE(op_info_map().count(op_type) == 0,
+    PADDLE_ENFORCE(OpInfoMap().count(op_type) == 0,
                    "'%s' is registered more than once.", op_type);
     OpInfo op_info;
-    op_info.creator_ = [](const std::string& type, const VarNameMap& inputs,
-                          const VarNameMap& outputs,
-                          const AttributeMap& attrs) {
+    op_info.creator_ = [](
+        const std::string& type, const VariableNameMap& inputs,
+        const VariableNameMap& outputs, const AttributeMap& attrs) {
       return new OpType(type, inputs, outputs, attrs);
     };
     op_info.grad_op_type_ = grad_op_type;
@@ -70,7 +59,7 @@ class OpRegistry {
       op_info.proto_ = nullptr;
       op_info.checker_ = nullptr;
     }
-    op_info_map().insert(std::make_pair(op_type, op_info));
+    OpInfoMap().insert(std::make_pair(op_type, op_info));
     // register gradient op
     if (!grad_op_type.empty()) {
       RegisterOp<GradOpType, NOPMaker, NOP>(grad_op_type, "");
@@ -78,21 +67,13 @@ class OpRegistry {
   }
 
   static std::unique_ptr<OperatorBase> CreateOp(const std::string& type,
-                                                const VarNameMap& inputs,
-                                                const VarNameMap& outputs,
+                                                const VariableNameMap& inputs,
+                                                const VariableNameMap& outputs,
                                                 AttributeMap attrs);
 
   static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc);
 
-  static VarNameMap ConvertOpDescVarsToVarNameMap(
-      const google::protobuf::RepeatedPtrField<OpDesc::Var>& op_desc_vars);
-
   static std::unique_ptr<OperatorBase> CreateGradOp(const OperatorBase& op);
-
-  static std::unordered_map<std::string, const OpInfo>& op_info_map() {
-    static std::unordered_map<std::string, const OpInfo> op_info_map_;
-    return op_info_map_;
-  }
 };
 
 class Registrar {
diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc
index eadd8f3316..48a7fe64ac 100644
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@@ -115,8 +115,8 @@ void OperatorBase::Rename(const std::string& old_name,
 }
 
 OperatorBase::OperatorBase(const std::string& type,
-                           const OperatorBase::VarNameMap& inputs,
-                           const OperatorBase::VarNameMap& outputs,
+                           const VariableNameMap& inputs,
+                           const VariableNameMap& outputs,
                            const AttributeMap& attrs)
     : type_(type), inputs_(inputs), outputs_(outputs), attrs_(attrs) {
   static std::atomic<size_t> gUniqId(0UL);
@@ -141,9 +141,9 @@ std::vector<std::string> OperatorBase::OutputVars(bool has_intermediate) const {
     }
     return ret_val;
   }
-  auto it = OpRegistry::op_info_map().find(type_);
+  auto it = OpInfoMap().find(type_);
   PADDLE_ENFORCE(
-      it != OpRegistry::op_info_map().end(),
+      it != OpInfoMap().end(),
       "Operator %s not registered, cannot figure out intermediate outputs",
       type_);
   PADDLE_ENFORCE(
diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h
index 8072980889..83dab8631d 100644
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@@ -19,6 +19,7 @@ limitations under the License. */
 #include <unordered_map>
 #include <vector>
 
+#include "op_info.h"
 #include "paddle/framework/attribute.h"
 #include "paddle/framework/framework.pb.h"
 #include "paddle/framework/scope.h"
@@ -62,10 +63,8 @@ class ExecutionContext;
  */
 class OperatorBase {
  public:
-  using VarNameMap = std::map<std::string, std::vector<std::string>>;
-
-  OperatorBase(const std::string& type, const VarNameMap& inputs,
-               const VarNameMap& outputs, const AttributeMap& attrs);
+  OperatorBase(const std::string& type, const VariableNameMap& inputs,
+               const VariableNameMap& outputs, const AttributeMap& attrs);
 
   virtual ~OperatorBase() {}
 
@@ -93,8 +92,8 @@ class OperatorBase {
   /// rename inputs outputs name
   void Rename(const std::string& old_name, const std::string& new_name);
 
-  const VarNameMap& Inputs() const { return inputs_; }
-  const VarNameMap& Outputs() const { return outputs_; }
+  const VariableNameMap& Inputs() const { return inputs_; }
+  const VariableNameMap& Outputs() const { return outputs_; }
   //! Get a input with argument's name described in `op_proto`
   const std::string& Input(const std::string& name) const;
   //! Get a input which has multiple variables.
@@ -122,11 +121,11 @@ class OperatorBase {
   // I (Inputs)opear
   // O (Outputs)
   // OG (Output Gradients)
-  VarNameMap inputs_;
+  VariableNameMap inputs_;
 
   // NOTE: in case of OpGrad, outputs_ contains
   // IG (Inputs Gradients)
-  VarNameMap outputs_;
+  VariableNameMap outputs_;
   AttributeMap attrs_;
 };
 
@@ -142,9 +141,11 @@ class OperatorBase {
 // You can also use
 //   using PARENT_CLASS::PARENT_CLASS;
 // to use parent's constructor.
-#define DEFINE_OP_CONSTRUCTOR(CLS, PARENT_CLS)                                 \
-  CLS(const std::string& type, const VarNameMap& inputs,                       \
-      const VarNameMap& outputs, const paddle::framework::AttributeMap& attrs) \
+#define DEFINE_OP_CONSTRUCTOR(CLS, PARENT_CLS)             \
+  CLS(const std::string& type,                             \
+      const ::paddle::framework::VariableNameMap& inputs,  \
+      const ::paddle::framework::VariableNameMap& outputs, \
+      const paddle::framework::AttributeMap& attrs)        \
       : PARENT_CLS(type, inputs, outputs, attrs) {}
 
 class NOP : public OperatorBase {
@@ -389,8 +390,8 @@ class OperatorWithKernel : public OperatorBase {
   using OpKernelMap =
       std::unordered_map<OpKernelKey, std::unique_ptr<OpKernel>, OpKernelHash>;
 
-  OperatorWithKernel(const std::string& type, const VarNameMap& inputs,
-                     const VarNameMap& outputs, const AttributeMap& attrs)
+  OperatorWithKernel(const std::string& type, const VariableNameMap& inputs,
+                     const VariableNameMap& outputs, const AttributeMap& attrs)
       : OperatorBase(type, inputs, outputs, attrs) {}
 
   void InferShape(const Scope& scope) const override {
diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc
index 2425b87779..1d7efb7b94 100644
--- a/paddle/framework/operator_test.cc
+++ b/paddle/framework/operator_test.cc
@@ -23,8 +23,8 @@ static int op_run_num = 0;
 
 class OpWithoutKernelTest : public OperatorBase {
  public:
-  OpWithoutKernelTest(const std::string& type, const VarNameMap& inputs,
-                      const VarNameMap& outputs, const AttributeMap& attrs)
+  OpWithoutKernelTest(const std::string& type, const VariableNameMap& inputs,
+                      const VariableNameMap& outputs, const AttributeMap& attrs)
       : OperatorBase(type, inputs, outputs, attrs), x(1) {}
   void InferShape(const Scope& scope) const override {}
   void Run(const Scope& scope,
@@ -249,8 +249,9 @@ TEST(OpKernel, multi_inputs) {
 class OperatorClone : public paddle::framework::OperatorBase {
  public:
   DEFINE_OP_CLONE_METHOD(OperatorClone);
-  OperatorClone(const std::string& type, const VarNameMap& inputs,
-                const VarNameMap& outputs,
+  OperatorClone(const std::string& type,
+                const paddle::framework::VariableNameMap& inputs,
+                const paddle::framework::VariableNameMap& outputs,
                 const paddle::framework::AttributeMap& attrs)
       : OperatorBase(type, inputs, outputs, attrs) {}
   void InferShape(const paddle::framework::Scope& scope) const override {}
diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc
index f0114b9e49..1aec483573 100644
--- a/paddle/framework/pybind.cc
+++ b/paddle/framework/pybind.cc
@@ -138,7 +138,7 @@ All parameter, weight, gradient are variables in Paddle.
   //! @note: Be careful! PyBind will return std::string as an unicode, not
   //! Python str. If you want a str object, you should cast them in Python.
   m.def("get_all_op_protos", []() -> std::vector<py::bytes> {
-    auto &op_info_map = OpRegistry::op_info_map();
+    auto &op_info_map = OpInfoMap();
     std::vector<py::bytes> ret_values;
     for (auto it = op_info_map.begin(); it != op_info_map.end(); ++it) {
       const OpProto *proto = it->second.proto_;
diff --git a/paddle/operators/net_op.cc b/paddle/operators/net_op.cc
index a7d7105110..9bfa712d98 100644
--- a/paddle/operators/net_op.cc
+++ b/paddle/operators/net_op.cc
@@ -81,9 +81,8 @@ std::vector<std::string> NetOp::OutputVars(bool has_intermediate) const {
   return ret_val;
 }
 
-NetOp::NetOp(const std::string& type,
-             const framework::OperatorBase::VarNameMap& inputs,
-             const framework::OperatorBase::VarNameMap& outputs,
+NetOp::NetOp(const std::string& type, const framework::VariableNameMap& inputs,
+             const framework::VariableNameMap& outputs,
              const framework::AttributeMap& attrs)
     : framework::OperatorBase(type, inputs, outputs, attrs) {}
 
diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h
index 885ac6eeca..05b475d88f 100644
--- a/paddle/operators/net_op.h
+++ b/paddle/operators/net_op.h
@@ -38,8 +38,10 @@ class NetOp : public framework::OperatorBase {
  public:
   static const char kAll[];
   NetOp() : framework::OperatorBase("plain_net", {}, {}, {}) {}
-  NetOp(const std::string& type, const VarNameMap& inputs,
-        const VarNameMap& outputs, const framework::AttributeMap& attrs);
+
+  NetOp(const std::string& type, const framework::VariableNameMap& inputs,
+        const framework::VariableNameMap& outputs,
+        const framework::AttributeMap& attrs);
 
   NetOp(const NetOp& o) : framework::OperatorBase(o.type_, {}, {}, o.attrs_) {
     this->ops_.reserve(o.ops_.size());
diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc
index 78ce0ba3c0..16bd249cb3 100644
--- a/paddle/operators/recurrent_op.cc
+++ b/paddle/operators/recurrent_op.cc
@@ -131,8 +131,8 @@ const rnn::ArgumentName RecurrentGradientOp::kArgName{
     "memories",    "pre_memories", "boot_memories@grad"};
 
 RecurrentOp::RecurrentOp(const std::string& type,
-                         const framework::OperatorBase::VarNameMap& inputs,
-                         const framework::OperatorBase::VarNameMap& outputs,
+                         const framework::VariableNameMap& inputs,
+                         const framework::VariableNameMap& outputs,
                          const framework::AttributeMap& attrs)
     : OperatorBase(type, inputs, outputs, attrs) {
   rnn::InitArgument(kArgName, &arg_, *this);
@@ -223,8 +223,8 @@ void RecurrentGradientAlgorithm::InferShape(const Scope& scope) const {
 }
 
 RecurrentGradientOp::RecurrentGradientOp(
-    const std::string& type, const framework::OperatorBase::VarNameMap& inputs,
-    const framework::OperatorBase::VarNameMap& outputs,
+    const std::string& type, const framework::VariableNameMap& inputs,
+    const framework::VariableNameMap& outputs,
     const framework::AttributeMap& attrs)
     : OperatorBase(type, inputs, outputs, attrs) {
   rnn::InitArgument(kArgName, &arg_, *this);
diff --git a/paddle/operators/recurrent_op.h b/paddle/operators/recurrent_op.h
index bcfa817de8..1033d657a3 100644
--- a/paddle/operators/recurrent_op.h
+++ b/paddle/operators/recurrent_op.h
@@ -114,8 +114,9 @@ class RecurrentGradientAlgorithm {
 
 class RecurrentOp : public framework::OperatorBase {
  public:
-  RecurrentOp(const std::string& type, const VarNameMap& inputs,
-              const VarNameMap& outputs, const framework::AttributeMap& attrs);
+  RecurrentOp(const std::string& type, const framework::VariableNameMap& inputs,
+              const framework::VariableNameMap& outputs,
+              const framework::AttributeMap& attrs);
 
   RecurrentOp(const RecurrentOp& o)
       : framework::OperatorBase(
@@ -150,8 +151,9 @@ class RecurrentOp : public framework::OperatorBase {
 
 class RecurrentGradientOp : public framework::OperatorBase {
  public:
-  RecurrentGradientOp(const std::string& type, const VarNameMap& inputs,
-                      const VarNameMap& outputs,
+  RecurrentGradientOp(const std::string& type,
+                      const framework::VariableNameMap& inputs,
+                      const framework::VariableNameMap& outputs,
                       const framework::AttributeMap& attrs);
 
   RecurrentGradientOp(const RecurrentGradientOp& o)

From 7f6b5044b640edcbd77bbb368509569776f7e0ee Mon Sep 17 00:00:00 2001
From: Yu Yang <yuyang18@baidu.com>
Date: Sun, 20 Aug 2017 15:28:00 +0800
Subject: [PATCH 040/170] Make OpInfoMap as a class

* Add Get/Has methods to OpInfoMap
* Add PADDLE_ENFORCE for OpInfo to get field.
---
 paddle/framework/grad_op_builder.cc | 20 +++-------
 paddle/framework/op_info.cc         |  9 ++---
 paddle/framework/op_info.h          | 61 ++++++++++++++++++++++++++++-
 paddle/framework/op_registry.cc     |  8 ++--
 paddle/framework/op_registry.h      |  4 +-
 paddle/framework/operator.cc        | 12 +-----
 paddle/framework/pybind.cc          | 17 ++++----
 7 files changed, 84 insertions(+), 47 deletions(-)

diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc
index fcc5d7a216..b02a599a80 100644
--- a/paddle/framework/grad_op_builder.cc
+++ b/paddle/framework/grad_op_builder.cc
@@ -24,9 +24,9 @@ static void TransOpArg(const OperatorBase* src_op, const OpArgType& src_type,
   const auto& src_inout =
       src_type == OpArgType::IN ? src_op->Inputs() : src_op->Outputs();
   auto& dst_inout = *vars;
-  const OpProto* proto = OpInfoMap().at(src_op->Type()).proto_;
+  auto& proto = OpInfoMap::Instance().Get(src_op->Type()).Proto();
   const auto& src_arg_list =
-      src_type == OpArgType::IN ? proto->inputs() : proto->outputs();
+      src_type == OpArgType::IN ? proto.inputs() : proto.outputs();
   for (const auto& arg : src_arg_list) {
     if (arg.not_in_gradient() && !is_grad) continue;
     const std::string src_name = arg.name();
@@ -40,14 +40,8 @@ static void TransOpArg(const OperatorBase* src_op, const OpArgType& src_type,
 }
 
 OperatorBase* BuildGradOp(const OperatorBase* op) {
-  auto it = OpInfoMap().find(op->Type());
-  PADDLE_ENFORCE(it != OpInfoMap().end(), "'%s' has not been registered.",
-                 op->Type());
-  PADDLE_ENFORCE(it->second.proto_ != nullptr, "'%s' has no OpProto.",
-                 op->Type());
-  std::string grad_op_type = it->second.grad_op_type_;
-  PADDLE_ENFORCE(!grad_op_type.empty(), "'%s' has no gradient operator.",
-                 op->Type());
+  auto& info = OpInfoMap::Instance().Get(op->Type());
+  PADDLE_ENFORCE(info.HasGradientOp());
 
   VariableNameMap inputs;
   VariableNameMap outputs;
@@ -56,10 +50,8 @@ OperatorBase* BuildGradOp(const OperatorBase* op) {
   TransOpArg(op, OpArgType::OUT, true, &inputs);   // OG
   TransOpArg(op, OpArgType::IN, true, &outputs);   // IG
 
-  it = OpInfoMap().find(grad_op_type);
-  PADDLE_ENFORCE(it != OpInfoMap().end(), "'%s' has not been registered.",
-                 grad_op_type);
-  return it->second.creator_(grad_op_type, inputs, outputs, op->Attrs());
+  auto& grad_info = OpInfoMap::Instance().Get(info.grad_op_type_);
+  return grad_info.Creator()(info.grad_op_type_, inputs, outputs, op->Attrs());
 }
 
 }  // namespace framework
diff --git a/paddle/framework/op_info.cc b/paddle/framework/op_info.cc
index f928ac6473..81ba29797c 100644
--- a/paddle/framework/op_info.cc
+++ b/paddle/framework/op_info.cc
@@ -17,12 +17,11 @@
 namespace paddle {
 namespace framework {
 
-static std::unordered_map<std::string, const paddle::framework::OpInfo>*
-    g_op_info_map = nullptr;
-std::unordered_map<std::string, const paddle::framework::OpInfo>& OpInfoMap() {
+static OpInfoMap* g_op_info_map = nullptr;
+
+OpInfoMap& OpInfoMap::Instance() {
   if (g_op_info_map == nullptr) {
-    g_op_info_map =
-        new std::unordered_map<std::string, const paddle::framework::OpInfo>();
+    g_op_info_map = new OpInfoMap();
   }
   return *g_op_info_map;
 }
diff --git a/paddle/framework/op_info.h b/paddle/framework/op_info.h
index fdd0ed77d4..94245c6c44 100644
--- a/paddle/framework/op_info.h
+++ b/paddle/framework/op_info.h
@@ -34,9 +34,68 @@ struct OpInfo {
   std::string grad_op_type_;
   OpProto* proto_;
   OpAttrChecker* checker_;
+
+  bool HasOpProtoAndChecker() const {
+    return proto_ != nullptr && checker_ != nullptr;
+  }
+
+  const OpProto& Proto() const {
+    PADDLE_ENFORCE_NOT_NULL(proto_, "Operator Proto has not been registered");
+    PADDLE_ENFORCE(proto_->IsInitialized(),
+                   "Operator Proto must be initialized in op info");
+    return *proto_;
+  }
+
+  const OpAttrChecker& Checker() const {
+    PADDLE_ENFORCE_NOT_NULL(checker_,
+                            "Operator Checker has not been registered");
+    return *checker_;
+  }
+
+  const OpCreator& Creator() const {
+    PADDLE_ENFORCE_NOT_NULL(creator_,
+                            "Operator Creator has not been registered");
+    return creator_;
+  }
+
+  bool HasGradientOp() const { return !grad_op_type_.empty(); }
 };
 
-extern std::unordered_map<std::string, const OpInfo>& OpInfoMap();
+class OpInfoMap {
+ public:
+  static OpInfoMap& Instance();
+
+  OpInfoMap(const OpInfoMap& o) = delete;
+  OpInfoMap(OpInfoMap&& o) = delete;
+  OpInfoMap& operator=(const OpInfoMap& o) = delete;
+  OpInfoMap& operator=(OpInfoMap&& o) = delete;
+
+  bool Has(const std::string& op_type) const {
+    return map_.find(op_type) != map_.end();
+  }
+
+  void Insert(const std::string& type, const OpInfo& info) {
+    PADDLE_ENFORCE(!Has(type), "Operator %s has been registered", type);
+    map_.insert({type, info});
+  }
+
+  const OpInfo& Get(const std::string& type) const {
+    auto it = map_.find(type);
+    PADDLE_ENFORCE(it != map_.end(), "Operator %s are not found", type);
+    return it->second;
+  }
+
+  template <typename Callback>
+  void IterAllInfo(Callback callback) {
+    for (auto& it : map_) {
+      callback(it.first, it.second);
+    }
+  }
+
+ private:
+  OpInfoMap() = default;
+  std::unordered_map<std::string, const OpInfo> map_;
+};
 
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/framework/op_registry.cc b/paddle/framework/op_registry.cc
index e03dc3a73d..b0e85dd49f 100644
--- a/paddle/framework/op_registry.cc
+++ b/paddle/framework/op_registry.cc
@@ -22,11 +22,9 @@ namespace framework {
 std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
     const std::string& type, const VariableNameMap& inputs,
     const VariableNameMap& outputs, AttributeMap attrs) {
-  auto it = OpInfoMap().find(type);
-  PADDLE_ENFORCE(it != OpInfoMap().end(),
-                 "Operator '%s' has not been registered.", type);
-  it->second.checker_->Check(attrs);
-  auto op = it->second.creator_(type, inputs, outputs, attrs);
+  auto& info = OpInfoMap::Instance().Get(type);
+  info.Checker().Check(attrs);
+  auto op = info.Creator()(type, inputs, outputs, attrs);
   return std::unique_ptr<OperatorBase>(op);
 }
 
diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h
index 06530bc7d0..2d09cde41e 100644
--- a/paddle/framework/op_registry.h
+++ b/paddle/framework/op_registry.h
@@ -35,7 +35,7 @@ class OpRegistry {
   template <typename OpType, typename ProtoMakerType, typename GradOpType>
   static void RegisterOp(const std::string& op_type,
                          const std::string& grad_op_type) {
-    PADDLE_ENFORCE(OpInfoMap().count(op_type) == 0,
+    PADDLE_ENFORCE(!OpInfoMap::Instance().Has(op_type),
                    "'%s' is registered more than once.", op_type);
     OpInfo op_info;
     op_info.creator_ = [](
@@ -59,7 +59,7 @@ class OpRegistry {
       op_info.proto_ = nullptr;
       op_info.checker_ = nullptr;
     }
-    OpInfoMap().insert(std::make_pair(op_type, op_info));
+    OpInfoMap::Instance().Insert(op_type, op_info);
     // register gradient op
     if (!grad_op_type.empty()) {
       RegisterOp<GradOpType, NOPMaker, NOP>(grad_op_type, "");
diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc
index 48a7fe64ac..7abbde610f 100644
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@@ -141,18 +141,10 @@ std::vector<std::string> OperatorBase::OutputVars(bool has_intermediate) const {
     }
     return ret_val;
   }
-  auto it = OpInfoMap().find(type_);
-  PADDLE_ENFORCE(
-      it != OpInfoMap().end(),
-      "Operator %s not registered, cannot figure out intermediate outputs",
-      type_);
-  PADDLE_ENFORCE(
-      it->second.proto_ != nullptr,
-      "Operator %s has no OpProto, cannot figure out intermediate outputs",
-      type_);
+  auto& info = OpInfoMap::Instance().Get(Type());
 
   // get all OpProto::Var for outputs
-  for (auto& o : it->second.proto_->outputs()) {
+  for (auto& o : info.Proto().outputs()) {
     // ignore all intermediate output
     if (o.intermediate()) continue;
     auto out = outputs_.find(o.name());
diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc
index 1aec483573..6212c84909 100644
--- a/paddle/framework/pybind.cc
+++ b/paddle/framework/pybind.cc
@@ -138,19 +138,16 @@ All parameter, weight, gradient are variables in Paddle.
   //! @note: Be careful! PyBind will return std::string as an unicode, not
   //! Python str. If you want a str object, you should cast them in Python.
   m.def("get_all_op_protos", []() -> std::vector<py::bytes> {
-    auto &op_info_map = OpInfoMap();
     std::vector<py::bytes> ret_values;
-    for (auto it = op_info_map.begin(); it != op_info_map.end(); ++it) {
-      const OpProto *proto = it->second.proto_;
-      if (proto == nullptr) {
-        continue;
-      }
-      PADDLE_ENFORCE(proto->IsInitialized(), "OpProto must all be initialized");
+
+    OpInfoMap::Instance().IterAllInfo([&ret_values](const std::string &type,
+                                                    const OpInfo &info) {
+      if (!info.HasOpProtoAndChecker()) return;
       std::string str;
-      PADDLE_ENFORCE(proto->SerializeToString(&str),
+      PADDLE_ENFORCE(info.Proto().SerializeToString(&str),
                      "Serialize OpProto Error. This could be a bug of Paddle.");
-      ret_values.push_back(py::bytes(str));
-    }
+      ret_values.emplace_back(str);
+    });
     return ret_values;
   });
   m.def_submodule(

From c108d6108cbdd28424397341fb67be01a2f63413 Mon Sep 17 00:00:00 2001
From: Yu Yang <yuyang18@baidu.com>
Date: Mon, 21 Aug 2017 14:03:12 +0800
Subject: [PATCH 041/170] Identity operator and its gradient

---
 paddle/framework/CMakeLists.txt               |  3 +-
 paddle/framework/pybind.cc                    |  1 +
 paddle/operators/CMakeLists.txt               |  1 +
 paddle/operators/identity_op.cc               | 71 +++++++++++++++++++
 paddle/operators/identity_op.cu               | 17 +++++
 paddle/operators/identity_op.h                | 32 +++++++++
 .../paddle/v2/framework/tests/CMakeLists.txt  |  1 +
 .../v2/framework/tests/test_identity_op.py    | 24 +++++++
 8 files changed, 149 insertions(+), 1 deletion(-)
 create mode 100644 paddle/operators/identity_op.cc
 create mode 100644 paddle/operators/identity_op.cu
 create mode 100644 paddle/operators/identity_op.h
 create mode 100644 python/paddle/v2/framework/tests/test_identity_op.py

diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index 68304c9fc8..f249512f47 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -55,5 +55,6 @@ cc_library(paddle_pybind SHARED
     recurrent_op
     uniform_random_op
     gaussian_random_op
-    fill_zeros_like_op)
+    fill_zeros_like_op
+    identity_op)
 endif(WITH_PYTHON)
diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc
index f0114b9e49..ddb244623f 100644
--- a/paddle/framework/pybind.cc
+++ b/paddle/framework/pybind.cc
@@ -42,6 +42,7 @@ USE_OP(fill_zeros_like);
 USE_OP_ITSELF(recurrent_op);
 USE_OP(gaussian_random);
 USE_OP(uniform_random);
+USE_OP(identity);
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index a7c89787e4..20e562c7d3 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -68,3 +68,4 @@ op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc
     DEPS framework_proto tensor op_registry operator net_op)
 op_library(uniform_random_op
         SRCS uniform_random_op.cc uniform_random_op.cu)
+op_library(identity_op SRCS identity_op.cc identity_op.cu DEPS net_op)
diff --git a/paddle/operators/identity_op.cc b/paddle/operators/identity_op.cc
new file mode 100644
index 0000000000..cac44020bc
--- /dev/null
+++ b/paddle/operators/identity_op.cc
@@ -0,0 +1,71 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/identity_op.h"
+#include "paddle/operators/net_op.h"
+
+namespace paddle {
+namespace operators {
+
+class IdentityOp : public framework::OperatorWithKernel {
+ public:
+  IdentityOp(const std::string &type, const VarNameMap &inputs,
+             const VarNameMap &outputs, const framework::AttributeMap &attrs)
+      : OperatorWithKernel(type, inputs, outputs, attrs) {}
+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    auto *in = ctx.Input<framework::Tensor>("X");
+    auto *out = ctx.Output<framework::Tensor>("Out");
+    out->Resize(in->dims());
+  }
+};
+
+class IdentityOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  IdentityOpMaker(framework::OpProto *proto,
+                  framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "The input tensor of identity operator.").NotInGradient();
+    AddOutput("Out", "The output tensor of identity operator.").NotInGradient();
+    AddComment(R"DOC(Identity operator
+
+The equation is: Out = X
+)DOC");
+  }
+};
+
+// Identity Op's gradient is identity op, too.
+// Grad(Out=identity_op(X)) => Grad(X) = identity_op(Grad(Out))
+class IdentityGradOp : public NetOp {
+ public:
+  IdentityGradOp(const std::string &type, const VarNameMap &inputs,
+                 const VarNameMap &outputs,
+                 const framework::AttributeMap &attrs)
+      : NetOp(type, inputs, outputs, attrs) {
+    AddOp(framework::OpRegistry::CreateOp(
+        "identity", {{"X", {Input(framework::GradVarName("Out"))}}},
+        {{"Out", {Output(framework::GradVarName("X"))}}}, {}));
+    CompleteAddOp(false);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OP(identity, ops::IdentityOp, ops::IdentityOpMaker, identity_grad,
+            ops::IdentityGradOp);
+REGISTER_OP_CPU_KERNEL(identity, ops::IdentityKernel<float>);
diff --git a/paddle/operators/identity_op.cu b/paddle/operators/identity_op.cu
new file mode 100644
index 0000000000..3053104bbe
--- /dev/null
+++ b/paddle/operators/identity_op.cu
@@ -0,0 +1,17 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/identity_op.h"
+
+REGISTER_OP_GPU_KERNEL(identity, paddle::operators::IdentityKernel<float>);
diff --git a/paddle/operators/identity_op.h b/paddle/operators/identity_op.h
new file mode 100644
index 0000000000..14a832257b
--- /dev/null
+++ b/paddle/operators/identity_op.h
@@ -0,0 +1,32 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#pragma once
+
+#include "paddle/framework/op_registry.h"
+#include "paddle/memory/memcpy.h"
+namespace paddle {
+namespace operators {
+template <typename T>
+class IdentityKernel : public framework::OpKernel {
+ public:
+  virtual void Compute(const framework::ExecutionContext& context) const {
+    auto* tensor = context.Output<framework::Tensor>("Out");
+    auto* in = context.Input<framework::Tensor>("X");
+    tensor->CopyFrom<T>(*in, in->place());
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt
index ce57a07130..cf7baa5556 100644
--- a/python/paddle/v2/framework/tests/CMakeLists.txt
+++ b/python/paddle/v2/framework/tests/CMakeLists.txt
@@ -27,3 +27,4 @@ py_test(test_uniform_random_op SRCS test_uniform_random_op.py)
 py_test(test_recurrent_op SRCS test_recurrent_op.py)
 py_test(test_sgd_op SRCS test_sgd_op.py)
 py_test(test_gradient_checker SRCS test_gradient_checker.py)
+py_test(test_identity_op SRCS test_identity_op.py)
diff --git a/python/paddle/v2/framework/tests/test_identity_op.py b/python/paddle/v2/framework/tests/test_identity_op.py
new file mode 100644
index 0000000000..181d9c0c21
--- /dev/null
+++ b/python/paddle/v2/framework/tests/test_identity_op.py
@@ -0,0 +1,24 @@
+import unittest
+from op_test_util import OpTestMeta
+from gradient_checker import GradientChecker, create_op
+import numpy as np
+
+
+class IdentityTest(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+
+    def setUp(self):
+        self.type = "identity"
+        self.inputs = {'X': np.random.random((32, 784)).astype("float32")}
+        self.outputs = {'Out': self.inputs['X']}
+
+
+class IdentityGradOpTest(GradientChecker):
+    def test_normal(self):
+        op = create_op("identity")
+        inputs = {"X": np.random.random((10, 10)).astype("float32")}
+        self.check_grad(op, inputs, set("X"), "Out")
+
+
+if __name__ == '__main__':
+    unittest.main()

From d5768ebc89868431040e47e3db126263da385d70 Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Fri, 18 Aug 2017 20:49:35 +0800
Subject: [PATCH 042/170] fix above comments

---
 paddle/cuda/include/hl_matrix.h               | 58 ++++++++-----
 paddle/cuda/include/stub/hl_matrix_stub.h     | 47 +++++++----
 paddle/cuda/src/hl_cuda_matrix.cu             | 84 +++++++++----------
 paddle/gserver/layers/Conv3DLayer.cpp         | 26 ++++--
 paddle/gserver/layers/Conv3DLayer.h           | 14 +---
 paddle/gserver/layers/ConvBaseLayer.cpp       | 26 +-----
 paddle/gserver/layers/ConvBaseLayer.h         |  1 -
 paddle/gserver/layers/CudnnConvBaseLayer.cpp  | 18 ++++
 paddle/gserver/layers/DeConv3DLayer.cpp       | 46 +++++-----
 paddle/gserver/layers/DeConv3DLayer.h         | 44 +++++-----
 paddle/gserver/layers/ExpandConvBaseLayer.cpp | 21 ++++-
 paddle/gserver/tests/test_LayerGrad.cpp       | 31 +++----
 paddle/math/tests/test_matrixCompare.cpp      | 28 ++-----
 proto/ModelConfig.proto                       |  4 +-
 14 files changed, 247 insertions(+), 201 deletions(-)

diff --git a/paddle/cuda/include/hl_matrix.h b/paddle/cuda/include/hl_matrix.h
index da2ed8cabb..a37921b749 100644
--- a/paddle/cuda/include/hl_matrix.h
+++ b/paddle/cuda/include/hl_matrix.h
@@ -240,16 +240,25 @@ extern void hl_matrix_rotate(
  * @param[in]   strideW    stride in the width.
  * @param[in]   paddingD   padding in the depth.
  * @param[in]   paddingH   padding in the height.
- * @param[in]   paddingW   padding in the width. 
+ * @param[in]   paddingW   padding in the width.
  * @param[out]   matDst     output matrix.
- * 
+ *
  */
-extern void hl_matrix_vol2Col(real* matSrc,
-                 int channel, int depth, int height, int width,
-                 int filterD, int filterH, int filterW,
-                 int strideD, int strideH, int strideW,
-                 int paddingD, int paddingH, int paddingW,
-                 real* matDst);
+extern void hl_matrix_vol2Col(const real* dataSrc,
+                              int channels,
+                              int depth,
+                              int height,
+                              int width,
+                              int filterD,
+                              int filterH,
+                              int filterW,
+                              int strideD,
+                              int strideH,
+                              int strideW,
+                              int paddingD,
+                              int paddingH,
+                              int paddingW,
+                              real* dataDst);
 
 /**
  * @brief  Matrix col2Vol: Convert col matrix into 3D volume
@@ -267,19 +276,28 @@ extern void hl_matrix_vol2Col(real* matSrc,
  * @param[in]   strideW    stride in the width.
  * @param[in]   paddingD   padding in the depth.
  * @param[in]   paddingH   padding in the height.
- * @param[in]   paddingW   padding in the width. 
+ * @param[in]   paddingW   padding in the width.
  * @param[in]   matSrc     input matrix.
- * @param[in]   beta       input 
- * @param[in]   alpha      input 
- * 
+ * @param[in]   beta       input
+ * @param[in]   alpha      input
+ *
  */
-extern void hl_matrix_col2Vol(real* matDst,
-                int channels, int depth, int height, int width,
-                int filterD, int filterH, int filterW,
-                int strideD, int strideH, int strideW,
-                int paddingD, int paddingH, int paddingW,
-                real* matSrc,
-                real alpha, real beta);
-
+extern void hl_matrix_col2Vol(real* dataDst,
+                              int channels,
+                              int depth,
+                              int height,
+                              int width,
+                              int filterD,
+                              int filterH,
+                              int filterW,
+                              int strideD,
+                              int strideH,
+                              int strideW,
+                              int paddingD,
+                              int paddingH,
+                              int paddingW,
+                              const real* dataSrc,
+                              real alpha,
+                              real beta);
 
 #endif /* HL_MATRIX_H_ */
diff --git a/paddle/cuda/include/stub/hl_matrix_stub.h b/paddle/cuda/include/stub/hl_matrix_stub.h
index 0b73777812..6ac332945c 100644
--- a/paddle/cuda/include/stub/hl_matrix_stub.h
+++ b/paddle/cuda/include/stub/hl_matrix_stub.h
@@ -99,19 +99,38 @@ inline void hl_matrix_collect_shared_bias(real* B_d,
 inline void hl_matrix_rotate(
     real* mat, real* matRot, int dimM, int dimN, bool clockWise) {}
 
-inline void hl_matrix_vol2Col(real* data,
-                       int channels, int depth, int height, int width,
-                       int filterD, int filterH, int filterW,
-                       int strideD, int strideH, int strideW,
-                       int paddingD, int paddingH, int paddingW,
-                       real* data_col) {}
-
-inline void hl_matrix_col2Vol(real* data,
-                       int channels, int depth, int height, int width,
-                       int filterD, int filterH, int filterW,
-                       int strideD, int strideH, int strideW,
-                       int paddingD, int paddingH, int paddingW,
-                       real* data_Im,
-                       real alpha, real beta) {}
+inline void hl_matrix_vol2Col(const real* dataSrc,
+                              int channels,
+                              int depth,
+                              int height,
+                              int width,
+                              int filterD,
+                              int filterH,
+                              int filterW,
+                              int strideD,
+                              int strideH,
+                              int strideW,
+                              int paddingD,
+                              int paddingH,
+                              int paddingW,
+                              real* dataDst) {}
+
+inline void hl_matrix_col2Vol(real* dataDst,
+                              int channels,
+                              int depth,
+                              int height,
+                              int width,
+                              int filterD,
+                              int filterH,
+                              int filterW,
+                              int strideD,
+                              int strideH,
+                              int strideW,
+                              int paddingD,
+                              int paddingH,
+                              int paddingW,
+                              const real* dataSrc,
+                              real alpha,
+                              real beta) {}
 
 #endif  // HL_MATRIX_STUB_H_
diff --git a/paddle/cuda/src/hl_cuda_matrix.cu b/paddle/cuda/src/hl_cuda_matrix.cu
index 3bf1b0251f..b41a3a1e06 100644
--- a/paddle/cuda/src/hl_cuda_matrix.cu
+++ b/paddle/cuda/src/hl_cuda_matrix.cu
@@ -594,7 +594,7 @@ void hl_matrix_rotate(
 }
 
 __global__ void keMatrixVol2Col(int num_kernels,
-                                real* dataSrc,
+                                const real* dataSrc,
                                 real* dataDst,
                                 int depth,
                                 int height,
@@ -643,7 +643,7 @@ __global__ void keMatrixVol2Col(int num_kernels,
   }
 }
 
-void hl_matrix_vol2Col(real* dataSrc,
+void hl_matrix_vol2Col(const real* dataSrc,
                        int channels,
                        int depth,
                        int height,
@@ -666,30 +666,30 @@ void hl_matrix_vol2Col(real* dataSrc,
   const int threads = 512;
   const int blocks = DIVUP(num_kernels, threads);
 
-  keMatrixVol2Col<<<blocks, threads>>>(num_kernels,
-                                       dataSrc,
-                                       dataDst,
-                                       depth,
-                                       height,
-                                       width,
-                                       filterD,
-                                       filterH,
-                                       filterW,
-                                       strideD,
-                                       strideH,
-                                       strideW,
-                                       paddingD,
-                                       paddingH,
-                                       paddingW,
-                                       depth_col,
-                                       height_col,
-                                       width_col);
+  keMatrixVol2Col<<<blocks, threads, 0, STREAM_DEFAULT>>>(num_kernels,
+                                                          dataSrc,
+                                                          dataDst,
+                                                          depth,
+                                                          height,
+                                                          width,
+                                                          filterD,
+                                                          filterH,
+                                                          filterW,
+                                                          strideD,
+                                                          strideH,
+                                                          strideW,
+                                                          paddingD,
+                                                          paddingH,
+                                                          paddingW,
+                                                          depth_col,
+                                                          height_col,
+                                                          width_col);
   CHECK_SYNC("hl_matrix_vol2Col failed");
 }
 
 __global__ void keMatrixCol2Vol(int num_kernels,
                                 real* dataDst,
-                                real* dataSrc,
+                                const real* dataSrc,
                                 int depth,
                                 int height,
                                 int width,
@@ -759,7 +759,7 @@ void hl_matrix_col2Vol(real* dataDst,
                        int paddingD,
                        int paddingH,
                        int paddingW,
-                       real* dataSrc,
+                       const real* dataSrc,
                        real alpha,
                        real beta) {
   int depth_col = (depth + 2 * paddingD - filterD) / strideD + 1;
@@ -770,26 +770,26 @@ void hl_matrix_col2Vol(real* dataDst,
   const int threads = 512;
   const int blocks = DIVUP(num_kernels, threads);
 
-  keMatrixCol2Vol<<<blocks, threads>>>(num_kernels,
-                                       dataDst,
-                                       dataSrc,
-                                       depth,
-                                       height,
-                                       width,
-                                       filterD,
-                                       filterH,
-                                       filterW,
-                                       strideD,
-                                       strideH,
-                                       strideW,
-                                       paddingD,
-                                       paddingH,
-                                       paddingW,
-                                       depth_col,
-                                       height_col,
-                                       width_col,
-                                       alpha,
-                                       beta);
+  keMatrixCol2Vol<<<blocks, threads, 0, STREAM_DEFAULT>>>(num_kernels,
+                                                          dataDst,
+                                                          dataSrc,
+                                                          depth,
+                                                          height,
+                                                          width,
+                                                          filterD,
+                                                          filterH,
+                                                          filterW,
+                                                          strideD,
+                                                          strideH,
+                                                          strideW,
+                                                          paddingD,
+                                                          paddingH,
+                                                          paddingW,
+                                                          depth_col,
+                                                          height_col,
+                                                          width_col,
+                                                          alpha,
+                                                          beta);
 
   CHECK_SYNC("hl_matrix_col2Vol failed");
 }
diff --git a/paddle/gserver/layers/Conv3DLayer.cpp b/paddle/gserver/layers/Conv3DLayer.cpp
index 106909824d..db907bbab1 100644
--- a/paddle/gserver/layers/Conv3DLayer.cpp
+++ b/paddle/gserver/layers/Conv3DLayer.cpp
@@ -28,16 +28,26 @@ bool Conv3DLayer::init(const LayerMap &layerMap,
     const ConvConfig &conf = inputConfig.conv_conf();
     M_.push_back(numFilters_ / conf.groups());
     K_.push_back(filterPixels_[index] * filterChannels_[index]);
-    if (nullptr != weights_[index]->getW())
-      weights_[index]->getW()->reshape(weights_[index]->getW()->getWidth(),
-                                       weights_[index]->getW()->getHeight());
-    if (nullptr != weights_[index]->getWGrad())
-      weights_[index]->getWGrad()->reshape(
-          weights_[index]->getWGrad()->getWidth(),
-          weights_[index]->getWGrad()->getHeight());
+
+    // create a new weight
+    size_t height, width;
+    width = filterPixels_[index] * filterChannels_[index];
+    height = numFilters_;
+    CHECK_EQ(parameters_[index]->getSize(), width * height);
+    Weight *w = new Weight(height, width, parameters_[index]);
+    weights_.emplace_back(w);
     ++index;
   }
-  CHECK(inputLayers_.size() == parameters_.size());
+  if (biasParameter_.get()) {
+    if (sharedBiases_) {
+      CHECK_EQ((size_t)numFilters_, biasParameter_->getSize());
+      biases_ =
+          std::unique_ptr<Weight>(new Weight(1, numFilters_, biasParameter_));
+    } else {
+      biases_ =
+          std::unique_ptr<Weight>(new Weight(1, getSize(), biasParameter_));
+    }
+  }
   return true;
 }
 
diff --git a/paddle/gserver/layers/Conv3DLayer.h b/paddle/gserver/layers/Conv3DLayer.h
index 703671e5d0..b622508d0c 100644
--- a/paddle/gserver/layers/Conv3DLayer.h
+++ b/paddle/gserver/layers/Conv3DLayer.h
@@ -12,13 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-
 #pragma once
-
+#include <vector>
 #include "ConvBaseLayer.h"
-#include "paddle/math/Matrix.h"
 #include "paddle/math/MathUtils.h"
-#include <vector>
+#include "paddle/math/Matrix.h"
 
 namespace paddle {
 
@@ -30,21 +28,17 @@ namespace paddle {
 class Conv3DLayer : public ConvBaseLayer {
 public:
   explicit Conv3DLayer(const LayerConfig& config) : ConvBaseLayer(config) {}
-
   ~Conv3DLayer() {}
 
-  bool init(const LayerMap &layerMap, const ParameterMap &parameterMap);
-
-  size_t getSize();
+  bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
 
   void forward(PassType passType);
   void addBias();
-
   void backward(const UpdateCallback& callback);
-
   void bpropBiases();
   void bpropData(int i);
   void bpropWeights(int i);
+  size_t getSize();
 
 protected:
   // Figure out the dimensions for individual gemms.
diff --git a/paddle/gserver/layers/ConvBaseLayer.cpp b/paddle/gserver/layers/ConvBaseLayer.cpp
index 6bcbe0ddb2..8c637eaec9 100644
--- a/paddle/gserver/layers/ConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ConvBaseLayer.cpp
@@ -21,8 +21,7 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
                          const ParameterMap& parameterMap) {
   /* Initialize the basic parent class */
   Layer::init(layerMap, parameterMap);
-  isDeconv_ = (config_.type() == "exconv" || config_.type() == "cudnn_conv" ||
-               config_.type() == "conv3d" || config_.type() == "deconv3d")
+  isDeconv_ = (config_.type() == "exconv" || config_.type() == "cudnn_conv")
                   ? false
                   : true;
 
@@ -56,28 +55,9 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
   }
 
   CHECK(inputLayers_.size() == parameters_.size());
-  for (size_t i = 0; i < inputLayers_.size(); i++) {
-    size_t height, width;
-    height = filterPixels_[i] * filterChannels_[i];
-    width = (!isDeconv_) ? numFilters_ : channels_[i];
-
-    // create a new weight
-    CHECK_EQ(parameters_[i]->getSize(), width * height);
-    Weight* w = new Weight(height, width, parameters_[i]);
-    weights_.emplace_back(w);
-  }
 
-  /* initialize the biases_ */
-  if (biasParameter_.get()) {
-    if (sharedBiases_) {
-      CHECK_EQ((size_t)numFilters_, biasParameter_->getSize());
-      biases_ =
-          std::unique_ptr<Weight>(new Weight(1, numFilters_, biasParameter_));
-    } else {
-      biases_ =
-          std::unique_ptr<Weight>(new Weight(1, getSize(), biasParameter_));
-    }
-  }
+  // create new weights_ in derived class
+  // create new biases_ in derived class
 
   // default caffe model
   caffeMode_ = true;
diff --git a/paddle/gserver/layers/ConvBaseLayer.h b/paddle/gserver/layers/ConvBaseLayer.h
index 8d1fd989e8..629c462776 100644
--- a/paddle/gserver/layers/ConvBaseLayer.h
+++ b/paddle/gserver/layers/ConvBaseLayer.h
@@ -23,7 +23,6 @@ namespace paddle {
  * with learned filters and (optionally) adds biases.
  */
 
-
 class ConvBaseLayer : public Layer {
 protected:
   typedef std::vector<int> IntV;
diff --git a/paddle/gserver/layers/CudnnConvBaseLayer.cpp b/paddle/gserver/layers/CudnnConvBaseLayer.cpp
index c056bbe4d1..9e954615cd 100644
--- a/paddle/gserver/layers/CudnnConvBaseLayer.cpp
+++ b/paddle/gserver/layers/CudnnConvBaseLayer.cpp
@@ -46,8 +46,26 @@ bool CudnnConvBaseLayer::init(const LayerMap &layerMap,
     projConf_.emplace_back(conf);
     projections_.emplace_back(
         Projection::create(*projConf_[i], parameters_[i], useGpu_));
+
+    // create a new weight
+    size_t height, width;
+    height = filterPixels_[i] * filterChannels_[i];
+    width = (!isDeconv_) ? numFilters_ : channels_[i];
+    CHECK_EQ(parameters_[i]->getSize(), width * height);
+    Weight *w = new Weight(height, width, parameters_[i]);
+    weights_.emplace_back(w);
   }
 
+  if (biasParameter_.get()) {
+    if (sharedBiases_) {
+      CHECK_EQ((size_t)numFilters_, biasParameter_->getSize());
+      biases_ =
+          std::unique_ptr<Weight>(new Weight(numFilters_, 1, biasParameter_));
+    } else {
+      biases_ =
+          std::unique_ptr<Weight>(new Weight(getSize(), 1, biasParameter_));
+    }
+  }
   if (biases_.get() && sharedBiases_) {
     hl_create_tensor_descriptor(&biasDesc_);
     hl_create_tensor_descriptor(&outputDesc_);
diff --git a/paddle/gserver/layers/DeConv3DLayer.cpp b/paddle/gserver/layers/DeConv3DLayer.cpp
index 5a54a68447..b18c06e36c 100644
--- a/paddle/gserver/layers/DeConv3DLayer.cpp
+++ b/paddle/gserver/layers/DeConv3DLayer.cpp
@@ -20,9 +20,6 @@ namespace paddle {
 
 REGISTER_LAYER(deconv3d, DeConv3DLayer);
 
-#define DECONV_OUTPUT_SIZE(IN_SIZE, STRID, PAD, KSIZE) \
-  (((IN_SIZE)-1) * (STRID)-2 * (PAD) + (KSIZE))
-
 bool DeConv3DLayer::init(const LayerMap &layerMap,
                          const ParameterMap &parameterMap) {
   if (!ConvBaseLayer::init(layerMap, parameterMap)) return false;
@@ -32,14 +29,25 @@ bool DeConv3DLayer::init(const LayerMap &layerMap,
   for (int index = 0; index < config_.inputs().size(); ++index) {
     M_.push_back(filterChannels_[index]);
     K_.push_back(filterPixels_[index] * (numFilters_ / groups_[index]));
-    if (weights_[index]->getW())
-      weights_[index]->getW()->reshape(filterPixels_[index] * numFilters_,
-                                       filterChannels_[index]);
-    if (weights_[index]->getWGrad())
-      weights_[index]->getWGrad()->reshape(filterPixels_[index] * numFilters_,
-                                           filterChannels_[index]);
+
+    // create a new weight
+    size_t height, width;
+    height = filterPixels_[index] * numFilters_;
+    width = filterChannels_[index];
+    CHECK_EQ(parameters_[index]->getSize(), width * height);
+    Weight *w = new Weight(height, width, parameters_[index]);
+    weights_.emplace_back(w);
+  }
+  if (biasParameter_.get()) {
+    if (sharedBiases_) {
+      CHECK_EQ((size_t)numFilters_, biasParameter_->getSize());
+      biases_ =
+          std::unique_ptr<Weight>(new Weight(1, numFilters_, biasParameter_));
+    } else {
+      biases_ =
+          std::unique_ptr<Weight>(new Weight(1, getSize(), biasParameter_));
+    }
   }
-  CHECK(inputLayers_.size() == parameters_.size());
   return true;
 }
 
@@ -52,22 +60,22 @@ size_t DeConv3DLayer::getSize() {
   outputW_.clear();
   outputD_.clear();
   N_.clear();
-  No_.clear();
+  NOut_.clear();
   size_t layerSize = 0;
   for (size_t i = 0; i < inputLayers_.size(); ++i) {
     // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight());
     // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth());
     // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth());
-    outputW_.push_back(DECONV_OUTPUT_SIZE(
-        imgSizeW_[i], stride_[i], padding_[i], filterSize_[i]));
-    outputH_.push_back(DECONV_OUTPUT_SIZE(
-        imgSizeH_[i], strideY_[i], paddingY_[i], filterSizeY_[i]));
-    outputD_.push_back(DECONV_OUTPUT_SIZE(
-        imgSizeD_[i], strideZ_[i], paddingZ_[i], filterSizeZ_[i]));
-    No_.push_back(outputD_[i] * outputH_[i] * outputW_[i]);
+    outputW_.push_back(
+        imageSize(imgSizeW_[i], filterSize_[i], padding_[i], stride_[i], true));
+    outputH_.push_back(imageSize(
+        imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i], true));
+    outputD_.push_back(imageSize(
+        imgSizeD_[i], filterSizeZ_[i], paddingZ_[i], strideZ_[i], true));
+    NOut_.push_back(outputD_[i] * outputH_[i] * outputW_[i]);
     N_.push_back(imgSizeD_[i] * imgSizeH_[i] * imgSizeW_[i]);
     CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize);
-    layerSize += No_[i] * numFilters_;
+    layerSize += NOut_[i] * numFilters_;
   }
   getOutput().setFrameHeight(outputH_[0]);
   getOutput().setFrameWidth(outputW_[0]);
diff --git a/paddle/gserver/layers/DeConv3DLayer.h b/paddle/gserver/layers/DeConv3DLayer.h
index 435807fe5d..a2a3d3f827 100644
--- a/paddle/gserver/layers/DeConv3DLayer.h
+++ b/paddle/gserver/layers/DeConv3DLayer.h
@@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-
 #pragma once
 
+#include <vector>
 #include "ConvBaseLayer.h"
-#include "paddle/math/Matrix.h"
 #include "paddle/math/MathUtils.h"
-#include <vector>
+#include "paddle/math/Matrix.h"
 
 namespace paddle {
 
@@ -29,30 +28,25 @@ namespace paddle {
  */
 class DeConv3DLayer : public ConvBaseLayer {
 public:
-    explicit DeConv3DLayer(const LayerConfig& config) : ConvBaseLayer(config) {}
-
-    ~DeConv3DLayer() {}
-
-    bool init(const LayerMap &layerMap, const ParameterMap &parameterMap);
-
-    size_t getSize();
-
-    void forward(PassType passType);
-    void addBias();
-
-    void backward(const UpdateCallback& callback);
-
-    void bpropBiases();
-    void bpropData(int i);
-    void bpropWeights(int i);
+  explicit DeConv3DLayer(const LayerConfig& config) : ConvBaseLayer(config) {}
+  ~DeConv3DLayer() {}
+  bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
+
+  void forward(PassType passType);
+  void addBias();
+  void backward(const UpdateCallback& callback);
+  void bpropBiases();
+  void bpropData(int i);
+  void bpropWeights(int i);
+  size_t getSize();
 
 protected:
-    // Figure out the dimensions for individual gemms.
-    IntV M_;  /// numFilters_ / filter_group_;
-    IntV N_;  /// channels_ * filterSizeZ_ * filterSize_ * filterSizeY_
-    IntV K_;  /// outputD_ * outputH_ * outputW_
-    IntV No_;
-    MatrixPtr colBuf_;
+  // Figure out the dimensions for individual gemms.
+  IntV M_;  /// numFilters_ / filter_group_;
+  IntV N_;  /// channels_ * filterSizeZ_ * filterSize_ * filterSizeY_
+  IntV K_;  /// outputD_ * outputH_ * outputW_
+  IntV NOut_;
+  MatrixPtr colBuf_;
 };
 
 }  // namespace paddle
diff --git a/paddle/gserver/layers/ExpandConvBaseLayer.cpp b/paddle/gserver/layers/ExpandConvBaseLayer.cpp
index 77736e78f9..2b7bef0a75 100644
--- a/paddle/gserver/layers/ExpandConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ExpandConvBaseLayer.cpp
@@ -22,12 +22,31 @@ bool ExpandConvBaseLayer::init(const LayerMap &layerMap,
   /* Initialize the basic convolutional parent class */
   ConvBaseLayer::init(layerMap, parameterMap);
 
+  int index = 0;
   for (auto &inputConfig : config_.inputs()) {
     const ConvConfig &conf = inputConfig.conv_conf();
     /* Consistent caffe mode for multiple input */
     caffeMode_ = conf.caffe_mode();
-  }
 
+    // create a new weight
+    size_t height, width;
+    height = filterPixels_[index] * filterChannels_[index];
+    width = (!isDeconv_) ? numFilters_ : channels_[index];
+    CHECK_EQ(parameters_[index]->getSize(), width * height);
+    Weight *w = new Weight(height, width, parameters_[index]);
+    weights_.emplace_back(w);
+    index++;
+  }
+  if (biasParameter_.get()) {
+    if (sharedBiases_) {
+      CHECK_EQ((size_t)numFilters_, biasParameter_->getSize());
+      biases_ =
+          std::unique_ptr<Weight>(new Weight(numFilters_, 1, biasParameter_));
+    } else {
+      biases_ =
+          std::unique_ptr<Weight>(new Weight(getSize(), 1, biasParameter_));
+    }
+  }
   getOutputSize();
 
   return true;
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index 1e80e2c0ee..d5724293bf 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -2019,7 +2019,7 @@ void test3DConvLayer(const string& type, bool trans, bool useGpu) {
   const int CHANNELS = 3;
   const int IMAGE_SIZE = 9;
   const int IMAGE_SIZE_Y = 9;
-  const int IMAGE_SIZE_Z = 9;  //  2, 3, 5, 5, 5
+  const int IMAGE_SIZE_Z = 9;
 
   TestConfig config;
   config.biasSize = NUM_FILTERS;
@@ -2084,10 +2084,6 @@ TEST(Layer, test3DConvLayer) {
 #endif
 }
 
-int deConvOutputSize(int inSize, int kSize, int pad, int stride) {
-  return (inSize - 1) * stride - 2 * pad + kSize;
-}
-
 void test3DDeConvLayer(const string& type, bool trans, bool useGpu) {
   // filter size
   const int NUM_FILTERS = 6;
@@ -2126,16 +2122,21 @@ void test3DDeConvLayer(const string& type, bool trans, bool useGpu) {
   conv->set_img_size(IMAGE_SIZE);
   conv->set_img_size_y(IMAGE_SIZE_Y);
   conv->set_img_size_z(IMAGE_SIZE_Z);
-  conv->set_output_x(deConvOutputSize(
-      conv->img_size(), conv->filter_size(), conv->padding(), conv->stride()));
-  conv->set_output_y(deConvOutputSize(conv->img_size_y(),
-                                      conv->filter_size_y(),
-                                      conv->padding_y(),
-                                      conv->stride_y()));
-  conv->set_output_z(deConvOutputSize(conv->img_size_z(),
-                                      conv->filter_size_z(),
-                                      conv->padding_z(),
-                                      conv->stride_z()));
+  conv->set_output_x(imageSize(conv->img_size(),
+                               conv->filter_size(),
+                               conv->padding(),
+                               conv->stride(),
+                               true));
+  conv->set_output_y(imageSize(conv->img_size_y(),
+                               conv->filter_size_y(),
+                               conv->padding_y(),
+                               conv->stride_y(),
+                               true));
+  conv->set_output_z(imageSize(conv->img_size_z(),
+                               conv->filter_size_z(),
+                               conv->padding_z(),
+                               conv->stride_z(),
+                               true));
   config.layerConfig.set_size(conv->output_x() * conv->output_y() *
                               conv->output_z() * NUM_FILTERS);
   conv->set_groups(1);
diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp
index 1d41ec0870..3abe4484db 100644
--- a/paddle/math/tests/test_matrixCompare.cpp
+++ b/paddle/math/tests/test_matrixCompare.cpp
@@ -18,6 +18,7 @@ limitations under the License. */
 
 #include <gtest/gtest.h>
 #include "TensorCheck.h"
+#include "paddle/math/MathUtils.h"
 #include "paddle/math/Matrix.h"
 #include "paddle/math/SparseMatrix.h"
 #include "paddle/testing/TestUtil.h"
@@ -1203,19 +1204,6 @@ TEST(Matrix, warpCTC) {
   }
 }
 
-int outputSizeCol2Vol(
-    int imageSize, int filterSize, int padding, int stride, bool caffeMode) {
-  int outputSize;
-  if (!caffeMode) {
-    outputSize =
-        (imageSize - filterSize + 2 * padding + stride - 1) / stride + 1;
-  } else {
-    outputSize = (imageSize - filterSize + 2 * padding) / stride + 1;
-  }
-  CHECK_GE(outputSize, 1);
-  return outputSize;
-}
-
 void testMatrixCol2Vol(int depth, int height, int width) {
   int channel = 3;
   int filterX = 3, filterY = 4, filterZ = 5;
@@ -1229,9 +1217,9 @@ void testMatrixCol2Vol(int depth, int height, int width) {
   cpuImage->randomizeUniform();
   gpuImage->copyFrom(*cpuImage);
 
-  int outD = outputSizeCol2Vol(depth, filterZ, padZ, strideZ, true);
-  int outH = outputSizeCol2Vol(height, filterY, padZ, strideY, true);
-  int outW = outputSizeCol2Vol(width, filterX, padZ, strideX, true);
+  int outD = outputSize(depth, filterZ, padZ, strideZ, true);
+  int outH = outputSize(height, filterY, padY, strideY, true);
+  int outW = outputSize(width, filterX, padX, strideX, true);
 
   int colBufHeight = channel * filterZ * filterY * filterX;
   int colBufWidth = outD * outH * outW;
@@ -1305,11 +1293,9 @@ void testMatrixCol2Vol(int depth, int height, int width) {
 }
 
 TEST(Matrix, col2Vol) {
-  for (auto depth : {9, 16, 64, 128}) {
-    for (auto height : {9, 11, 73, 128, 256}) {
-      for (auto width : {
-               9, 32, 100, 512,
-           }) {
+  for (auto depth : {9, 16, 64}) {
+    for (auto height : {9, 11, 128}) {
+      for (auto width : {9, 32, 128}) {
         VLOG(3) << "depth=" << depth << " height=" << height
                 << " width=" << width;
         testMatrixCol2Vol(depth, height, width);
diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto
index 043ae502b0..8c6eb5b7e1 100644
--- a/proto/ModelConfig.proto
+++ b/proto/ModelConfig.proto
@@ -82,7 +82,7 @@ message ConvConfig {
 
   // if not set, use img_size
   optional uint32 img_size_y = 14;
-  
+
   optional uint32 filter_size_z = 15 [ default = 1 ];
   optional uint32 padding_z = 16 [ default = 1 ];
   optional uint32 stride_z = 17 [ default = 1 ];
@@ -637,4 +637,4 @@ message ModelConfig {
   // For External Machine, defining how to split a neural network
   // into multiple parts.
   optional ExternalConfig external_config = 9;
-};
\ No newline at end of file
+};

From 4bffbd30f0dbc2a2bbff4aa8108867fceecc260a Mon Sep 17 00:00:00 2001
From: tensor-tang <jian.j.tang@intel.com>
Date: Mon, 21 Aug 2017 16:44:30 +0800
Subject: [PATCH 043/170] use MKLDNNMatrix in fc forward

---
 paddle/gserver/layers/Layer.cpp         |  2 +-
 paddle/gserver/layers/Layer.h           | 20 +++++++-
 paddle/gserver/layers/MKLDNNFcLayer.cpp | 63 ++++++++++++++++---------
 paddle/gserver/layers/MKLDNNLayer.h     | 25 +++++++---
 paddle/math/CMakeLists.txt              |  4 --
 paddle/math/MKLDNNMatrix.cpp            | 29 +++++++++++-
 paddle/math/MKLDNNMatrix.h              | 43 +++++++++++++----
 7 files changed, 143 insertions(+), 43 deletions(-)

diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/gserver/layers/Layer.cpp
index d5621412ca..2bc20eee6c 100644
--- a/paddle/gserver/layers/Layer.cpp
+++ b/paddle/gserver/layers/Layer.cpp
@@ -41,7 +41,7 @@ namespace paddle {
 Layer::Layer(const LayerConfig& config, bool useGpu)
     : config_(config),
       useGpu_(useGpu),
-      deviceId_(-1),
+      deviceId_(CPU_DEVICE),
       needSequenceInfo_(true) {}
 
 bool Layer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) {
diff --git a/paddle/gserver/layers/Layer.h b/paddle/gserver/layers/Layer.h
index 0ed482889d..ec4d093e0c 100644
--- a/paddle/gserver/layers/Layer.h
+++ b/paddle/gserver/layers/Layer.h
@@ -59,7 +59,12 @@ protected:
   LayerConfig config_;
   /// whether to use GPU
   bool useGpu_;
-  /// Device Id. CPU is -1, and GPU is 0, 1, 2 ...
+  /// Paddle device ID, MKLDNN is -2, CPU is -1
+  enum PADDLE_DEVICE_ID {
+    MKLDNN_DEVICE = -2,
+    CPU_DEVICE = -1,
+  };
+  /// Device Id. MKLDNN is -2, CPU is -1, and GPU is 0, 1, 2 ...
   int deviceId_;
   /// Input layers
   std::vector<LayerPtr> inputLayers_;
@@ -321,6 +326,19 @@ public:
     if (deviceId == getDeviceId()) {
       return output_;
     } else {
+      bool CPU2MKLDNN =
+          getDeviceId() == CPU_DEVICE && deviceId == MKLDNN_DEVICE;
+      bool MKLDNN2CPU =
+          getDeviceId() == MKLDNN_DEVICE && deviceId == CPU_DEVICE;
+      if (CPU2MKLDNN) {
+        // TODO: do something
+        return output_;
+      } else if (MKLDNN2CPU) {
+        // TODO: do something
+        return output_;
+      }
+
+      // TODO: handle mkldnn device or add mkldnn device to other
       for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
         if (outputOtherDevice_[i].deviceId == deviceId) {
           return outputOtherDevice_[i];
diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp
index d201fac65e..fac0390eee 100644
--- a/paddle/gserver/layers/MKLDNNFcLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp
@@ -135,33 +135,51 @@ void MKLDNNFcLayer::reshape() {
 
 void MKLDNNFcLayer::resetFwd() {
   bool hasBias = biases_ && biases_->getW();
-  real* iData = getInputValue(0)->getData();
-  real* oData = getOutputValue()->getData();
-  real* wData = weight_->getW()->getData();
-  real* bData = hasBias ? biases_->getW()->getData() : NULL;
+  const MatrixPtr& in = getInputValue(0);
+  const MatrixPtr& wgt = weight_->getW();
+  const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr;
+  const MatrixPtr& out = output_.value;
+
+  if (getPrev(0)->getDeviceId() == MKLDNN_DEVICE) {
+    inVal_ = std::dynamic_pointer_cast<MKLDNNMatrix>(in);
+    CHECK(inVal_) << "Input should be MKLDNNMatrix";
+    // TODO:  change input nchw to nc if available
+    // inVal_->downSpatial()
+  } else {
+    inVal_ = MKLDNNMatrix::create(
+        in,
+        hasSpatial_ ? memory::dims{bs_, ic_, ih_, iw_} : memory::dims{bs_, ic_},
+        hasSpatial_ ? format::nchw : format::nc,
+        engine_);
+  }
 
-  // TODO(TJ): below create should be covered in MkldnnMatrix
-  // create memory desc
-  memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw)
-                                 : createMD({bs_, ic_}, format::nc);
-  memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw)
-                                 : createMD({oc_, ic_}, format::oi);
-  memory::desc bMD = bData != NULL ? createMD({oc_}, format::x)
-                                   : createMD({}, format::format_undef);
-  memory::desc oMD = createMD({bs_, oc_}, format::nc);
+  wgtVal_ = MKLDNNMatrix::create(
+      wgt,
+      hasSpatial_ ? memory::dims{oc_, ic_, ih_, iw_} : memory::dims{oc_, ic_},
+      hasSpatial_ ? format::oihw : format::oi,
+      engine_);
 
-  // create memory primitive desc and memory self
-  inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData));
-  wgtVal_.reset(new memory(memory::primitive_desc(wMD, engine_), wData));
-  outVal_.reset(new memory(memory::primitive_desc(oMD, engine_), oData));
+  biasVal_ =
+      hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr;
+
+  outVal_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_);
+
+  // change original output to mkldnn output
+  output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
 
+  // create forward handle
   prop_kind pk = prop_kind::forward;
-  fc_fwd::desc fwdDesc = bData != NULL ? fc_fwd::desc(pk, iMD, wMD, bMD, oMD)
-                                       : fc_fwd::desc(pk, iMD, wMD, oMD);
+  fc_fwd::desc fwdDesc =
+      hasBias ? fc_fwd::desc(pk,
+                             inVal_->getMD(),
+                             wgtVal_->getMD(),
+                             biasVal_->getMD(),
+                             outVal_->getMD())
+              : fc_fwd::desc(
+                    pk, inVal_->getMD(), wgtVal_->getMD(), outVal_->getMD());
   fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
 
-  if (bData != NULL) {
-    biasVal_.reset(new memory(memory::primitive_desc(bMD, engine_), bData));
+  if (hasBias) {
     fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_));
   } else {
     fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_));
@@ -197,7 +215,8 @@ void MKLDNNFcLayer::resetBwd() {
     // update data
     inVal_->set_data_handle(iData);
   } else {
-    inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData));
+    LOG(FATAL) << "Should not be empty";
+    // inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData));
   }
 
   // create memory primitive desc and memory self
diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h
index 9533027fa6..b44095befb 100644
--- a/paddle/gserver/layers/MKLDNNLayer.h
+++ b/paddle/gserver/layers/MKLDNNLayer.h
@@ -21,7 +21,6 @@ limitations under the License. */
 #include "paddle/math/MKLDNNMatrix.h"
 
 DECLARE_bool(use_mkldnn);
-DECLARE_bool(use_mkldnn_wgt);
 
 namespace paddle {
 
@@ -54,13 +53,14 @@ protected:
   std::vector<mkldnn::primitive> pipelineBwd_;
 
   // TODO(TJ): change below memory as MKLDNNMatrixPtr type
-  std::shared_ptr<mkldnn::memory> inVal_;
+  // MKLDNNMatrixPtr ;
+  MKLDNNMatrixPtr inVal_;
   std::shared_ptr<mkldnn::memory> inGrad_;
-  std::shared_ptr<mkldnn::memory> outVal_;
+  MKLDNNMatrixPtr outVal_;
   std::shared_ptr<mkldnn::memory> outGrad_;
-  std::shared_ptr<mkldnn::memory> wgtVal_;
+  MKLDNNMatrixPtr wgtVal_;
   std::shared_ptr<mkldnn::memory> wgtGrad_;
-  std::shared_ptr<mkldnn::memory> biasVal_;
+  MKLDNNMatrixPtr biasVal_;
   std::shared_ptr<mkldnn::memory> biasGrad_;
 
 public:
@@ -94,7 +94,7 @@ public:
     stream_.reset(new MKLDNNStream());
     engine_ = CPUEngine::Instance().getEngine();
 
-    // TODO(TJ): deivecId
+    setDeviceID(MKLDNN_DEVICE);
     return true;
   }
 
@@ -128,6 +128,19 @@ public:
     // TODO(TJ): isFmtSuppoted(fmt)
     return mkldnn::memory::desc(dims, type, fmt);
   }
+
+  void resetMKLDNNOutput(size_t height, size_t width) {
+    Layer::resetOutput(height, width);
+    // get valu and grad, use mkldnn matrix instaed
+    // output_.value;
+  }
+
+protected:
+  void setDeviceID(int id) {
+    deviceId_ = id;
+    output_.deviceId = id;
+    // TODO: handle mkldnn device or add mkldnn device to other
+  }
 };
 
 }  // namespace paddle
diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt
index ad6de18c81..8afe6b509d 100644
--- a/paddle/math/CMakeLists.txt
+++ b/paddle/math/CMakeLists.txt
@@ -15,13 +15,9 @@
 file(GLOB MATH_HEADERS . *.h)
 file(GLOB MATH_SOURCES . *.cpp)
 
-message(STATUS "----------MATH_HEADERS:${MATH_HEADERS}")
-message(STATUS "----------MATH_SOURCES:${MATH_SOURCES}")
 if(NOT WITH_MKLDNN)
     file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h")
     file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp")
-    message(STATUS "----------DNN_HEADER:${DNN_HEADER}")
-    message(STATUS "----------DNN_SOURCES:${DNN_SOURCES}")
     list(REMOVE_ITEM MATH_HEADERS ${DNN_HEADER})
     list(REMOVE_ITEM MATH_SOURCES ${DNN_SOURCES})
     message(STATUS "Skip compiling with MKLDNNMatrix")
diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp
index df8e72d78b..44fc54278c 100644
--- a/paddle/math/MKLDNNMatrix.cpp
+++ b/paddle/math/MKLDNNMatrix.cpp
@@ -16,4 +16,31 @@ limitations under the License. */
 
 using namespace mkldnn;  // NOLINT
 
-namespace paddle {}  // namespace paddle
+namespace paddle {
+
+MKLDNNMatrixPtr MKLDNNMatrix::create(const MatrixPtr& m,
+                                     memory::dims dims,
+                                     memory::format fmt,
+                                     engine& eg,
+                                     mkldnn::memory::data_type dtype) {
+  CpuMatrixPtr cpuM = std::dynamic_pointer_cast<CpuMatrix>(m);
+  CHECK(cpuM) << "Only support create from CPU matrix yet";
+
+  size_t ndims = dims.size();
+  CHECK(ndims > 0) << "Input dims should not be empty";
+  size_t cnt = 1;
+  for (size_t i = 0; i < ndims; ++i) {
+    cnt *= dims[i];
+  }
+  CHECK_EQ(cnt, m->getElementCnt()) << "Count size does not match";
+
+  size_t width = m->getWidth();
+  size_t height = m->getHeight();
+  real* data = m->getData();
+
+  memory::desc md = memory::desc(dims, dtype, fmt);
+  memory::primitive_desc pd = memory::primitive_desc(md, eg);
+  return std::make_shared<MKLDNNMatrix>(data, height, width, pd);
+}
+
+}  // namespace paddle
diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h
index 91ef56f2c3..73eb50d2a0 100644
--- a/paddle/math/MKLDNNMatrix.h
+++ b/paddle/math/MKLDNNMatrix.h
@@ -14,9 +14,8 @@ limitations under the License. */
 
 #pragma once
 
-//#include "Matrix.h"
-#include "Vector.h"
-
+#include <vector>
+#include "Matrix.h"
 #include "mkldnn.hpp"
 #include "paddle/parameter/Parameter.h"
 
@@ -32,14 +31,42 @@ typedef std::shared_ptr<MKLDNNMatrix> MKLDNNMatrixPtr;
  * @brief MKLDNN Matrix.
  *
  */
-class MKLDNNMatrix : public CpuVector {
+class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory {
 public:
-  explicit MKLDNNMatrix(size_t size, int fmt) : CpuVector(size), fmt_(fmt) {}
+  MKLDNNMatrix(real* data,
+               size_t height,
+               size_t width,
+               mkldnn::memory::primitive_desc pd)
+      : CpuMatrix(data, height, width, false), mkldnn::memory(pd, data) {}
 
-  ~MKLDNNMatrix() {}
+  MKLDNNMatrix(size_t height, size_t width, mkldnn::memory::primitive_desc pd)
+      : CpuMatrix(height, width, false), mkldnn::memory(pd) {
+    set_data_handle(CpuMatrix::getData());
+  }
+
+  static MKLDNNMatrixPtr create(
+      const MatrixPtr& m,
+      mkldnn::memory::dims dims,
+      mkldnn::memory::format fmt,
+      mkldnn::engine& eg,
+      mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32);
+
+  /**
+   * Get primitive descriptor
+   */
+  mkldnn::memory::primitive_desc getPD() { return this->get_primitive_desc(); }
 
-protected:
-  int fmt_;
+  /**
+   * Get memory descriptor
+   */
+  mkldnn::memory::desc getMD() { return getPD().desc(); }
+
+  /**
+   * Get format
+   */
+  int getFormat() { return getMD().data.format; }
+
+  ~MKLDNNMatrix() {}
 };
 
 }  // namespace paddle

From 73ab2d4678418d42dd5a03d5f8531b49eab2e7ce Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Mon, 21 Aug 2017 17:57:06 +0800
Subject: [PATCH 044/170] fix backward error of huber_regression_cost

---
 paddle/gserver/layers/CostLayer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp
index 91a742422e..7f648070f2 100644
--- a/paddle/gserver/layers/CostLayer.cpp
+++ b/paddle/gserver/layers/CostLayer.cpp
@@ -644,7 +644,7 @@ void HuberRegressionLoss::backwardImp(Matrix& output,
     if (std::abs(a) <= delta_)
       grad[i] += -a;
     else
-      grad[i] += a > 0 ? delta_ : -delta_;
+      grad[i] += a > 0 ? -delta_ : delta_;
   }
   if (useGpu_) outputG.copyFrom(grad, numSamples);
 }

From d3f219aa9911015bd8c4a1316b85620a07eb9f49 Mon Sep 17 00:00:00 2001
From: Yu Yang <yuyang18@baidu.com>
Date: Mon, 21 Aug 2017 18:09:17 +0800
Subject: [PATCH 045/170] Change IdentityOp to ScaleOp

---
 paddle/framework/CMakeLists.txt               |   2 +-
 paddle/framework/pybind.cc                    |   3 +-
 paddle/framework/tensor.h                     |   5 +-
 paddle/operators/CMakeLists.txt               |   2 +-
 paddle/operators/identity_op.cc               |  71 ------------
 paddle/operators/net_op.cc                    |   9 +-
 paddle/operators/scale_op.cc                  | 102 ++++++++++++++++++
 .../operators/{identity_op.cu => scale_op.cu} |   5 +-
 .../operators/{identity_op.h => scale_op.h}   |  16 ++-
 .../paddle/v2/framework/tests/CMakeLists.txt  |   2 +-
 .../v2/framework/tests/gradient_checker.py    |   7 +-
 ...ty_op.py => test_scale_and_identity_op.py} |  19 ++++
 12 files changed, 158 insertions(+), 85 deletions(-)
 delete mode 100644 paddle/operators/identity_op.cc
 create mode 100644 paddle/operators/scale_op.cc
 rename paddle/operators/{identity_op.cu => scale_op.cu} (81%)
 rename paddle/operators/{identity_op.h => scale_op.h} (66%)
 rename python/paddle/v2/framework/tests/{test_identity_op.py => test_scale_and_identity_op.py} (51%)

diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index f249512f47..5df14ae78d 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -56,5 +56,5 @@ cc_library(paddle_pybind SHARED
     uniform_random_op
     gaussian_random_op
     fill_zeros_like_op
-    identity_op)
+    scale_op)
 endif(WITH_PYTHON)
diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc
index ddb244623f..3aaf0de150 100644
--- a/paddle/framework/pybind.cc
+++ b/paddle/framework/pybind.cc
@@ -42,7 +42,8 @@ USE_OP(fill_zeros_like);
 USE_OP_ITSELF(recurrent_op);
 USE_OP(gaussian_random);
 USE_OP(uniform_random);
-USE_OP(identity);
+USE_OP(scale);
+USE_OP_ITSELF(identity);
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h
index b8c779f4e5..643f875491 100644
--- a/paddle/framework/tensor.h
+++ b/paddle/framework/tensor.h
@@ -105,7 +105,10 @@ class Tensor {
   template <typename T>
   inline Tensor Slice(const int& begin_idx, const int& end_idx) const;
 
-  platform::Place place() const { return holder_->place(); }
+  platform::Place place() const {
+    PADDLE_ENFORCE_NOT_NULL(holder_, "Tensor get place() must contains holder");
+    return holder_->place();
+  }
 
  private:
   template <typename T>
diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index 20e562c7d3..0ba598823b 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -68,4 +68,4 @@ op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc
     DEPS framework_proto tensor op_registry operator net_op)
 op_library(uniform_random_op
         SRCS uniform_random_op.cc uniform_random_op.cu)
-op_library(identity_op SRCS identity_op.cc identity_op.cu DEPS net_op)
+op_library(scale_op SRCS scale_op.cc scale_op.cu DEPS net_op)
diff --git a/paddle/operators/identity_op.cc b/paddle/operators/identity_op.cc
deleted file mode 100644
index cac44020bc..0000000000
--- a/paddle/operators/identity_op.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License. */
-
-#include "paddle/operators/identity_op.h"
-#include "paddle/operators/net_op.h"
-
-namespace paddle {
-namespace operators {
-
-class IdentityOp : public framework::OperatorWithKernel {
- public:
-  IdentityOp(const std::string &type, const VarNameMap &inputs,
-             const VarNameMap &outputs, const framework::AttributeMap &attrs)
-      : OperatorWithKernel(type, inputs, outputs, attrs) {}
-
- protected:
-  void InferShape(const framework::InferShapeContext &ctx) const override {
-    auto *in = ctx.Input<framework::Tensor>("X");
-    auto *out = ctx.Output<framework::Tensor>("Out");
-    out->Resize(in->dims());
-  }
-};
-
-class IdentityOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  IdentityOpMaker(framework::OpProto *proto,
-                  framework::OpAttrChecker *op_checker)
-      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("X", "The input tensor of identity operator.").NotInGradient();
-    AddOutput("Out", "The output tensor of identity operator.").NotInGradient();
-    AddComment(R"DOC(Identity operator
-
-The equation is: Out = X
-)DOC");
-  }
-};
-
-// Identity Op's gradient is identity op, too.
-// Grad(Out=identity_op(X)) => Grad(X) = identity_op(Grad(Out))
-class IdentityGradOp : public NetOp {
- public:
-  IdentityGradOp(const std::string &type, const VarNameMap &inputs,
-                 const VarNameMap &outputs,
-                 const framework::AttributeMap &attrs)
-      : NetOp(type, inputs, outputs, attrs) {
-    AddOp(framework::OpRegistry::CreateOp(
-        "identity", {{"X", {Input(framework::GradVarName("Out"))}}},
-        {{"Out", {Output(framework::GradVarName("X"))}}}, {}));
-    CompleteAddOp(false);
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-
-REGISTER_OP(identity, ops::IdentityOp, ops::IdentityOpMaker, identity_grad,
-            ops::IdentityGradOp);
-REGISTER_OP_CPU_KERNEL(identity, ops::IdentityKernel<float>);
diff --git a/paddle/operators/net_op.cc b/paddle/operators/net_op.cc
index a7d7105110..7e3779ed2e 100644
--- a/paddle/operators/net_op.cc
+++ b/paddle/operators/net_op.cc
@@ -68,10 +68,15 @@ std::string NetOp::DebugString() const {
 bool NetOp::IsNetOp() const { return true; }
 
 std::vector<std::string> NetOp::OutputVars(bool has_intermediate) const {
+  std::vector<std::string> all;
+  for (auto& pair : this->outputs_) {
+    for (auto& var_name : pair.second) {
+      all.push_back(var_name);
+    }
+  }
   if (has_intermediate) {
-    return this->outputs_.at(kAll);
+    return all;
   }
-  auto& all = this->outputs_.at(kAll);
   std::vector<std::string> ret_val;
   for (auto& each : all) {
     if (!Contains(intermediate_outputs_, each)) {
diff --git a/paddle/operators/scale_op.cc b/paddle/operators/scale_op.cc
new file mode 100644
index 0000000000..3b18ff078e
--- /dev/null
+++ b/paddle/operators/scale_op.cc
@@ -0,0 +1,102 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/scale_op.h"
+#include "paddle/operators/net_op.h"
+
+namespace paddle {
+namespace operators {
+
+class ScaleOp : public framework::OperatorWithKernel {
+ public:
+  ScaleOp(const std::string &type, const VarNameMap &inputs,
+          const VarNameMap &outputs, const framework::AttributeMap &attrs)
+      : OperatorWithKernel(type, inputs, outputs, attrs) {}
+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    auto *in = ctx.Input<framework::Tensor>("X");
+    auto *out = ctx.Output<framework::Tensor>("Out");
+    out->Resize(in->dims());
+  }
+};
+
+template <typename AttrType>
+class ScaleOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  ScaleOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "The input tensor of scale operator.").NotInGradient();
+    AddOutput("Out", "The output tensor of scale operator.").NotInGradient();
+    AddComment(R"DOC(Scale operator
+
+The equation is: Out = scale*X
+)DOC");
+    AddAttr<AttrType>("scale", "scale of scale operator.").SetDefault(1.0);
+  }
+};
+
+// Identity Op's gradient is identity op, too.
+// Grad(Out=scale(X)) => Grad(X) = scale(Grad(Out))
+template <typename AttrType>
+class ScaleGradOp : public NetOp {
+ public:
+  ScaleGradOp(const std::string &type, const VarNameMap &inputs,
+              const VarNameMap &outputs, const framework::AttributeMap &attrs)
+      : NetOp(type, inputs, outputs, attrs) {
+    AddOp(framework::OpRegistry::CreateOp(
+        "scale", {{"X", {Input(framework::GradVarName("Out"))}}},
+        {{"Out", {Output(framework::GradVarName("X"))}}},
+        {{"scale", GetAttr<AttrType>("scale")}}));
+    CompleteAddOp(false);
+  }
+};
+
+// identity is a alias of scale op. This is also a example for creating a alias
+// operator.
+template <typename AttrType>
+class IdentityOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  IdentityOpMaker(framework::OpProto *proto,
+                  framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "input tensor of identity op");
+    AddOutput("Out", "output tensor of identity op");
+    AddComment("identity operator. Just a alias of scale op which scale = 1.0");
+  }
+};
+
+template <typename AttrType>
+class IdentityOp : public NetOp {
+ public:
+  IdentityOp(const std::string &type, const VarNameMap &inputs,
+             const VarNameMap &outputs, const framework::AttributeMap &attrs)
+      : NetOp(type, inputs, outputs, attrs) {
+    AddOp(framework::OpRegistry::CreateOp(
+        "scale", {{"X", {Input("X")}}}, {{"Out", {Output("Out")}}},
+        {{"scale", static_cast<AttrType>(1)}}));
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OP(scale, ops::ScaleOp, ops::ScaleOpMaker<float>, scale_grad,
+            ops::ScaleGradOp<float>);
+REGISTER_OP_CPU_KERNEL(scale,
+                       ops::ScaleKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_WITHOUT_GRADIENT(identity, ops::IdentityOp<float>,
+                             ops::IdentityOpMaker<float>);
diff --git a/paddle/operators/identity_op.cu b/paddle/operators/scale_op.cu
similarity index 81%
rename from paddle/operators/identity_op.cu
rename to paddle/operators/scale_op.cu
index 3053104bbe..63efbe0da8 100644
--- a/paddle/operators/identity_op.cu
+++ b/paddle/operators/scale_op.cu
@@ -12,6 +12,7 @@
    See the License for the specific language governing permissions and
    limitations under the License. */
 
-#include "paddle/operators/identity_op.h"
+#include "paddle/operators/scale_op.h"
 
-REGISTER_OP_GPU_KERNEL(identity, paddle::operators::IdentityKernel<float>);
+REGISTER_OP_GPU_KERNEL(
+    scale, paddle::operators::ScaleKernel<paddle::platform::GPUPlace, float>);
diff --git a/paddle/operators/identity_op.h b/paddle/operators/scale_op.h
similarity index 66%
rename from paddle/operators/identity_op.h
rename to paddle/operators/scale_op.h
index 14a832257b..aea64f1b04 100644
--- a/paddle/operators/identity_op.h
+++ b/paddle/operators/scale_op.h
@@ -14,17 +14,25 @@
 
 #pragma once
 
+#include "paddle/framework/eigen.h"
 #include "paddle/framework/op_registry.h"
-#include "paddle/memory/memcpy.h"
+
 namespace paddle {
 namespace operators {
-template <typename T>
-class IdentityKernel : public framework::OpKernel {
+template <typename Place, typename T, typename AttrType = T>
+class ScaleKernel : public framework::OpKernel {
  public:
   virtual void Compute(const framework::ExecutionContext& context) const {
     auto* tensor = context.Output<framework::Tensor>("Out");
     auto* in = context.Input<framework::Tensor>("X");
-    tensor->CopyFrom<T>(*in, in->place());
+    tensor->mutable_data<T>(in->place());
+
+    auto scale = static_cast<T>(context.op_.GetAttr<AttrType>("scale"));
+
+    auto eigen_out = framework::EigenVector<T>::Flatten(*tensor);
+    auto eigen_in = framework::EigenVector<T>::Flatten(*in);
+    auto& dev = context.GetEigenDevice<Place>();
+    eigen_out.device(dev) = scale * eigen_in;
   }
 };
 
diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt
index cf7baa5556..0e8811bfe7 100644
--- a/python/paddle/v2/framework/tests/CMakeLists.txt
+++ b/python/paddle/v2/framework/tests/CMakeLists.txt
@@ -27,4 +27,4 @@ py_test(test_uniform_random_op SRCS test_uniform_random_op.py)
 py_test(test_recurrent_op SRCS test_recurrent_op.py)
 py_test(test_sgd_op SRCS test_sgd_op.py)
 py_test(test_gradient_checker SRCS test_gradient_checker.py)
-py_test(test_identity_op SRCS test_identity_op.py)
+py_test(test_scale_and_identity_op SRCS test_scale_and_identity_op.py)
diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py
index 8b8e2f444b..c22c6f8831 100644
--- a/python/paddle/v2/framework/tests/gradient_checker.py
+++ b/python/paddle/v2/framework/tests/gradient_checker.py
@@ -160,8 +160,13 @@ class GradientChecker(unittest.TestCase):
             grad_tensor.set(data, place)
 
         # run backward op
-        for name in backward_op.outputs():
+        backward_outs = backward_op.outputs()
+        backward_names = [
+            item for key in backward_outs for item in backward_outs[key]
+        ]
+        for name in backward_names:
             scope.new_var(name)
+
         backward_op.infer_shape(scope)
         backward_op.run(scope, ctx)
 
diff --git a/python/paddle/v2/framework/tests/test_identity_op.py b/python/paddle/v2/framework/tests/test_scale_and_identity_op.py
similarity index 51%
rename from python/paddle/v2/framework/tests/test_identity_op.py
rename to python/paddle/v2/framework/tests/test_scale_and_identity_op.py
index 181d9c0c21..69b301c376 100644
--- a/python/paddle/v2/framework/tests/test_identity_op.py
+++ b/python/paddle/v2/framework/tests/test_scale_and_identity_op.py
@@ -2,6 +2,7 @@ import unittest
 from op_test_util import OpTestMeta
 from gradient_checker import GradientChecker, create_op
 import numpy as np
+from paddle.v2.framework.op import Operator
 
 
 class IdentityTest(unittest.TestCase):
@@ -20,5 +21,23 @@ class IdentityGradOpTest(GradientChecker):
         self.check_grad(op, inputs, set("X"), "Out")
 
 
+class ScaleTest(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+
+    def setUp(self):
+        self.type = "scale"
+        self.inputs = {'X': np.random.random((32, 784)).astype("float32")}
+        self.attrs = {'scale': -2.3}
+        self.outputs = {'Out': self.inputs['X'] * self.attrs['scale']}
+
+
+class ScaleGradTest(GradientChecker):
+    def test_normal(self):
+        op = Operator("scale", X="X", Out="Out", scale=3.2)
+        self.check_grad(op,
+                        {"X": np.random.random((10, 10)).astype("float32")},
+                        set("X"), "Out")
+
+
 if __name__ == '__main__':
     unittest.main()

From 2377d719473543da3a6129de3c6c32667bdb9f18 Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Mon, 21 Aug 2017 09:28:03 +0800
Subject: [PATCH 046/170] Add3DPooling

---
 paddle/cuda/include/hl_cnn.h             | 198 ++++++++-
 paddle/cuda/include/stub/hl_cnn_stub.h   |  90 ++++
 paddle/cuda/src/hl_cuda_cnn.cu           | 427 ++++++++++++++++++-
 paddle/gserver/layers/Pool3DLayer.cpp    | 198 +++++++++
 paddle/gserver/layers/Pool3DLayer.h      |  48 +++
 paddle/gserver/tests/test_LayerGrad.cpp  |  69 ++++
 paddle/math/Matrix.cpp                   | 502 +++++++++++++++++++++++
 paddle/math/Matrix.h                     | 254 +++++++++++-
 paddle/math/tests/test_matrixCompare.cpp | 204 +++++++++
 paddle/parameter/Argument.cpp            |   2 +
 paddle/parameter/Argument.h              |   8 +-
 proto/ModelConfig.proto                  |  12 +
 12 files changed, 1998 insertions(+), 14 deletions(-)
 create mode 100644 paddle/gserver/layers/Pool3DLayer.cpp
 create mode 100644 paddle/gserver/layers/Pool3DLayer.h

diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/cuda/include/hl_cnn.h
index 9f84db72da..e9687d0a58 100644
--- a/paddle/cuda/include/hl_cnn.h
+++ b/paddle/cuda/include/hl_cnn.h
@@ -173,6 +173,202 @@ extern void hl_avgpool_backward(const int frameCnt,
                                 real* backGrad,
                                 const int outStride);
 
+/**
+ * @brief   Maximum pool forward.
+ *
+ * @param[in]   frameCnt    batch size of input image.
+ * @param[in]   inputData   input data.
+ * @param[in]   channels    number of channel.
+ * @param[in]   depth      image depth.
+ * @param[in]   height      image height.
+ * @param[in]   width       image width.
+ * @param[in]   pooledD     output image depth.
+ * @param[in]   pooledH     output image height.
+ * @param[in]   pooledW     output image width.
+ * @param[in]   sizeZ       depth of pooling window.
+ * @param[in]   sizeY       height of pooling window.
+ * @param[in]   sizeX       width of pooling window.
+ * @param[in]   strideD     pooling stride depth.
+ * @param[in]   strideH     pooling stride height.
+ * @param[in]   strideW     pooling stride width.
+ * @param[in]   paddingD    padding depth.
+ * @param[in]   paddingH    padding height.
+ * @param[in]   paddingW    padding width.
+ * @param[out]  tgtData     output data.
+ * @param[in]   tgtStride   stride between output data samples.
+ *
+ */
+extern void hl_maxpool3D_forward(const int frameCnt,
+                                 const real* inputData,
+                                 const int channels,
+                                 const int depth,
+                                 const int height,
+                                 const int width,
+                                 const int pooledD,
+                                 const int pooledH,
+                                 const int pooledW,
+                                 const int sizeZ,
+                                 const int sizeY,
+                                 const int sizeX,
+                                 const int strideD,
+                                 const int strideH,
+                                 const int strideW,
+                                 const int paddingD,
+                                 const int paddingH,
+                                 const int paddingW,
+                                 real* tgtData,
+                                 const int tgtStride);
+
+/**
+ * @brief   Maximum pool backward.
+ *
+ * @param[in]   frameCnt    batch size of input image.
+ * @param[in]   inputData   input data.
+ * @param[out]  outData     output data.
+ * @param[out]  outGrad     output grad data.
+ * @param[in]   channels    number of channel.
+ * @param[in]   depth       image depth.
+ * @param[in]   height      image height.
+ * @param[in]   width       image width.
+ * @param[in]   pooledD     output image depth.
+ * @param[in]   pooledH     output image height.
+ * @param[in]   pooledW     output image width.
+ * @param[in]   sizeZ       depth of pooling window.
+ * @param[in]   sizeY       height of pooling window.
+ * @param[in]   sizeX       width of pooling window.
+ * @param[in]   strideD     pooling stride depth.
+ * @param[in]   strideH     pooling stride height.
+ * @param[in]   strideW     pooling stride width.
+ * @param[in]   scaleA      scale.
+ * @param[in]   scaleB      scale.
+ * @param[in]   paddingD    padding depth.
+ * @param[in]   paddingH    padding height.
+ * @param[in]   paddingW    padding width.
+ * @param[out]  targetGrad  output grad.
+ * @param[in]   outStride   stride between output data samples.
+ *
+ */
+extern void hl_maxpool3D_backward(const int frameCnt,
+                                  const real* inputData,
+                                  const real* outData,
+                                  const real* outGrad,
+                                  const int channels,
+                                  const int depth,
+                                  const int height,
+                                  const int width,
+                                  const int pooledD,
+                                  const int pooledH,
+                                  const int pooledW,
+                                  const int sizeZ,
+                                  const int sizeY,
+                                  const int sizeX,
+                                  const int strideD,
+                                  const int strideH,
+                                  const int strideW,
+                                  const int paddingD,
+                                  const int paddingH,
+                                  const int paddingW,
+                                  real scaleA,
+                                  real scaleB,
+                                  real* targetGrad,
+                                  const int outStride);
+
+/**
+ * @brief   Averge pool forward.
+ *
+ * @param[in]   frameCnt    batch size of input image.
+ * @param[in]   inputData   input data.
+ * @param[in]   channels    number of channel.
+ * @param[in]   depth       image depth.
+ * @param[in]   height      image height.
+ * @param[in]   width       image width.
+ * @param[in]   pooledD     output image depth.
+ * @param[in]   pooledH     output image height.
+ * @param[in]   pooledW     output image width.
+ * @param[in]   sizeZ       depth of pooling window.
+ * @param[in]   sizeY       height of pooling window.
+ * @param[in]   sizeX       width of pooling window.
+ * @param[in]   strideD     pooling stride depth.
+ * @param[in]   strideH     pooling stride height.
+ * @param[in]   strideW     pooling stride width.
+ * @param[in]   paddingD    padding depth.
+ * @param[in]   paddingH    padding height.
+ * @param[in]   paddingW    padding width.
+ * @param[out]  tgtData     output data.
+ * @param[in]   tgtStride   stride between output data samples.
+ *
+ */
+extern void hl_avgpool3D_forward(const int frameCnt,
+                                 const real* inputData,
+                                 const int channels,
+                                 const int depth,
+                                 const int height,
+                                 const int width,
+                                 const int pooledD,
+                                 const int pooledH,
+                                 const int pooledW,
+                                 const int sizeZ,
+                                 const int sizeY,
+                                 const int sizeX,
+                                 const int strideD,
+                                 const int strideH,
+                                 const int strideW,
+                                 const int paddingD,
+                                 const int paddingH,
+                                 const int paddingW,
+                                 real* tgtData,
+                                 const int tgtStride);
+
+/**
+ * @brief   Maximum pool backward.
+ *
+ * @param[in]   frameCnt    batch size of input image.
+ * @param[in]   outGrad     output grad data.
+ * @param[in]   channels    number of channel.
+ * @param[in]   depth      image depth.
+ * @param[in]   height      image height.
+ * @param[in]   width       image width.
+ * @param[in]   pooledD     output image depth.
+ * @param[in]   pooledH     output image height.
+ * @param[in]   pooledW     output image width.
+ * @param[in]   sizeZ       depth of pooling window.
+ * @param[in]   sizeY       height of pooling window.
+ * @param[in]   sizeX       width of pooling window.
+ * @param[in]   strideD     pooling stride depth.
+ * @param[in]   strideH     pooling stride height.
+ * @param[in]   strideW     pooling stride width.
+ * @param[in]   paddingD    padding depth.
+ * @param[in]   paddingH    padding height.
+ * @param[in]   paddingW    padding width.
+ * @param[in]   scaleA      scale.
+ * @param[in]   scaleB      scale.
+ * @param[out]  backGrad    output grad.
+ * @param[in]   outStride   stride between output data samples.
+ *
+ */
+extern void hl_avgpool3D_backward(const int frameCnt,
+                                  const real* outGrad,
+                                  const int channels,
+                                  const int depth,
+                                  const int height,
+                                  const int width,
+                                  const int pooledD,
+                                  const int pooledH,
+                                  const int pooledW,
+                                  const int sizeZ,
+                                  const int sizeY,
+                                  const int sizeX,
+                                  const int strideD,
+                                  const int strideH,
+                                  const int strideW,
+                                  int paddingD,
+                                  int paddingH,
+                                  int paddingW,
+                                  real scaleA,
+                                  real scaleB,
+                                  real* backGrad,
+                                  const int outStride);
+
 /**
  * @brief   Bilinear interpolation forward.
  *
@@ -275,4 +471,4 @@ extern void hl_maxout_backward(real* inGrad,
                                size_t featLen,
                                size_t groups);
 
-#endif /* HL_CNN_H_ */
+#endif  // HL_CNN_H_
diff --git a/paddle/cuda/include/stub/hl_cnn_stub.h b/paddle/cuda/include/stub/hl_cnn_stub.h
index 2bbb9fa8df..28f61781be 100644
--- a/paddle/cuda/include/stub/hl_cnn_stub.h
+++ b/paddle/cuda/include/stub/hl_cnn_stub.h
@@ -87,6 +87,96 @@ inline void hl_avgpool_backward(const int frameCnt,
                                 real* backGrad,
                                 const int outStride) {}
 
+inline void hl_maxpool3D_forward(const int frameCnt,
+                                 const real* inputData,
+                                 const int channels,
+                                 const int depth,
+                                 const int height,
+                                 const int width,
+                                 const int pooledD,
+                                 const int pooledH,
+                                 const int pooledW,
+                                 const int sizeZ,
+                                 const int sizeY,
+                                 const int sizeX,
+                                 const int strideD,
+                                 const int strideH,
+                                 const int strideW,
+                                 const int paddingD,
+                                 const int paddingH,
+                                 const int paddingW,
+                                 real* tgtData,
+                                 const int tgtStride) {}
+
+inline void hl_maxpool3D_backward(const int frameCnt,
+                                  const real* inputData,
+                                  const real* outData,
+                                  const real* outGrad,
+                                  const int channels,
+                                  const int depth,
+                                  const int height,
+                                  const int width,
+                                  const int pooledD,
+                                  const int pooledH,
+                                  const int pooledW,
+                                  const int sizeZ,
+                                  const int sizeY,
+                                  const int sizeX,
+                                  const int strideD,
+                                  const int strideH,
+                                  const int strideW,
+                                  const int paddingD,
+                                  const int paddingH,
+                                  const int paddingW,
+                                  real scaleA,
+                                  real scaleB,
+                                  real* targetGrad,
+                                  const int outStride) {}
+
+inline void hl_avgpool3D_forward(const int frameCnt,
+                                 const real* inputData,
+                                 const int channels,
+                                 const int depth,
+                                 const int height,
+                                 const int width,
+                                 const int pooledD,
+                                 const int pooledH,
+                                 const int pooledW,
+                                 const int sizeZ,
+                                 const int sizeY,
+                                 const int sizeX,
+                                 const int strideD,
+                                 const int strideH,
+                                 const int strideW,
+                                 const int paddingD,
+                                 const int paddingH,
+                                 const int paddingW,
+                                 real* tgtData,
+                                 const int tgtStride) {}
+
+inline void hl_avgpool3D_backward(const int frameCnt,
+                                  const real* outGrad,
+                                  const int channels,
+                                  const int depth,
+                                  const int height,
+                                  const int width,
+                                  const int pooledD,
+                                  const int pooledH,
+                                  const int pooledW,
+                                  const int sizeZ,
+                                  const int sizeY,
+                                  const int sizeX,
+                                  const int strideD,
+                                  const int strideH,
+                                  const int strideW,
+                                  int paddingD,
+                                  int paddingH,
+                                  int paddingW,
+                                  real scaleA,
+                                  real scaleB,
+                                  real* backGrad,
+                                  const int outStride) {}
+
 inline void hl_bilinear_forward(const real* inData,
                                 const size_t inImgH,
                                 const size_t inImgW,
diff --git a/paddle/cuda/src/hl_cuda_cnn.cu b/paddle/cuda/src/hl_cuda_cnn.cu
index aac19b1ea5..458c347728 100644
--- a/paddle/cuda/src/hl_cuda_cnn.cu
+++ b/paddle/cuda/src/hl_cuda_cnn.cu
@@ -1,11 +1,8 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-
     http://www.apache.org/licenses/LICENSE-2.0
-
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -353,6 +350,430 @@ void hl_avgpool_backward(const int frameCnt,
   CHECK_SYNC("hl_avgpool_backward failed");
 }
 
+/////////////////
+__global__ void KeMaxPool3DForward(const int nthreads,
+                                   const real* inputData,
+                                   const int channels,
+                                   const int depth,
+                                   const int height,
+                                   const int width,
+                                   const int pooledD,
+                                   const int pooledH,
+                                   const int pooledW,
+                                   const int ksizeD,
+                                   const int ksizeH,
+                                   const int ksizeW,
+                                   const int strideD,
+                                   const int strideH,
+                                   const int strideW,
+                                   const int offsetD,
+                                   const int offsetH,
+                                   const int offsetW,
+                                   real* tgtData,
+                                   const int tgtStride) {
+  for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads);
+       index += blockDim.x * gridDim.x) {
+    int pw = index % pooledW;
+    int ph = (index / pooledW) % pooledH;
+    int pd = (index / pooledW / pooledH) % pooledD;
+    int c = (index / pooledW / pooledH / pooledD) % channels;
+    int frameNum = index / pooledW / pooledH / pooledD / channels;
+    int dstart = pd * strideD - offsetD;
+    int hstart = ph * strideH - offsetH;
+    int wstart = pw * strideW - offsetW;
+    int dend = min(dstart + ksizeD, depth);
+    int hend = min(hstart + ksizeH, height);
+    int wend = min(wstart + ksizeW, width);
+    dstart = max(dstart, 0);
+    hstart = max(hstart, 0);
+    wstart = max(wstart, 0);
+    real maxval = -FLT_MAX;
+    inputData += (frameNum * channels + c) * depth * height * width;
+    for (int d = dstart; d < dend; ++d) {
+      for (int h = hstart; h < hend; ++h) {
+        for (int w = wstart; w < wend; ++w) {
+          if (maxval < inputData[(d * height + h) * width + w])
+            maxval = inputData[(d * height + h) * width + w];
+        }
+      }
+    }
+    int tgtIndex =
+        index % (pooledW * pooledH * pooledD * channels) + frameNum * tgtStride;
+    tgtData[tgtIndex] = maxval;
+  }
+}
+
+void hl_maxpool3D_forward(const int frameCnt,
+                          const real* inputData,
+                          const int channels,
+                          const int depth,
+                          const int height,
+                          const int width,
+                          const int pooledD,
+                          const int pooledH,
+                          const int pooledW,
+                          const int sizeZ,
+                          const int sizeY,
+                          const int sizeX,
+                          const int strideD,
+                          const int strideH,
+                          const int strideW,
+                          const int paddingD,
+                          const int paddingH,
+                          const int paddingW,
+                          real* tgtData,
+                          const int tgtStride) {
+  int num_kernels = pooledD * pooledH * pooledW * channels * frameCnt;
+  int blocks = (num_kernels + 1024 - 1) / 1024;
+  dim3 threads(1024, 1);
+  dim3 grid(blocks, 1);
+
+  KeMaxPool3DForward<<<grid, threads, 0, STREAM_DEFAULT>>>(num_kernels,
+                                                           inputData,
+                                                           channels,
+                                                           depth,
+                                                           height,
+                                                           width,
+                                                           pooledD,
+                                                           pooledH,
+                                                           pooledW,
+                                                           sizeZ,
+                                                           sizeY,
+                                                           sizeX,
+                                                           strideD,
+                                                           strideH,
+                                                           strideW,
+                                                           paddingD,
+                                                           paddingH,
+                                                           paddingW,
+                                                           tgtData,
+                                                           tgtStride);
+  CHECK_SYNC("hl_maxpool3D_forward failed");
+}
+
+__global__ void KeMaxPool3DBackward(const int nthreads,
+                                    const real* inputData,
+                                    const real* outData,
+                                    const real* outGrad,
+                                    const int channels,
+                                    const int depth,
+                                    const int height,
+                                    const int width,
+                                    const int pooledD,
+                                    const int pooledH,
+                                    const int pooledW,
+                                    const int sizeZ,
+                                    const int sizeY,
+                                    const int sizeX,
+                                    const int strideD,
+                                    const int strideH,
+                                    const int strideW,
+                                    const int padD,
+                                    const int padH,
+                                    const int padW,
+                                    real scaleA,
+                                    real scaleB,
+                                    real* targetGrad,
+                                    const int outStride) {
+  for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads);
+       index += blockDim.x * gridDim.x) {
+    // find out the local index
+    // find out the local offset
+    int offsetW = index % width + padW;
+    int offsetH = (index / width) % height + padH;
+    int offsetD = (index / width / height) % depth + padD;
+    int offsetC = (index / width / height / depth) % channels;
+    int frameNum = index / width / height / depth / channels;
+
+    int pdstart = (offsetD < sizeZ) ? 0 : (offsetD - sizeZ) / strideD + 1;
+    int phstart = (offsetH < sizeY) ? 0 : (offsetH - sizeY) / strideH + 1;
+    int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / strideW + 1;
+    int pdend = min(offsetD / strideD + 1, pooledD);
+    int phend = min(offsetH / strideH + 1, pooledH);
+    int pwend = min(offsetW / strideW + 1, pooledW);
+
+    real gradient = 0;
+    real input = inputData[index];
+
+    outData += ((frameNum * channels + offsetC) * pooledD * pooledH * pooledW);
+    outGrad += ((frameNum * channels + offsetC) * pooledD * pooledH * pooledW);
+    for (int pd = pdstart; pd < pdend; ++pd) {
+      for (int ph = phstart; ph < phend; ++ph) {
+        for (int pw = pwstart; pw < pwend; ++pw) {
+          if (input == outData[(pd * pooledH + ph) * pooledW + pw])
+            gradient += outGrad[(pd * pooledH + ph) * pooledW + pw];
+        }
+      }
+    }
+    targetGrad[index] = scaleA * gradient + scaleB * targetGrad[index];
+  }
+}
+
+void hl_maxpool3D_backward(const int frameCnt,
+                           const real* inputData,
+                           const real* outData,
+                           const real* outGrad,
+                           const int channels,
+                           const int depth,
+                           const int height,
+                           const int width,
+                           const int outputD,
+                           const int outputH,
+                           const int outputW,
+                           const int sizeZ,
+                           const int sizeY,
+                           const int sizeX,
+                           const int strideD,
+                           const int strideH,
+                           const int strideW,
+                           const int paddingD,
+                           const int paddingH,
+                           const int paddingW,
+                           real scaleA,
+                           real scaleB,
+                           real* targetGrad,
+                           const int outStride) {
+  int num_kernels = depth * height * width * channels * frameCnt;
+  int blocks = (num_kernels + 1024 - 1) / 1024;
+
+  KeMaxPool3DBackward<<<blocks, 1024, 0, STREAM_DEFAULT>>>(num_kernels,
+                                                           inputData,
+                                                           outData,
+                                                           outGrad,
+                                                           channels,
+                                                           depth,
+                                                           height,
+                                                           width,
+                                                           outputD,
+                                                           outputH,
+                                                           outputW,
+                                                           sizeZ,
+                                                           sizeY,
+                                                           sizeX,
+                                                           strideD,
+                                                           strideH,
+                                                           strideW,
+                                                           paddingD,
+                                                           paddingH,
+                                                           paddingW,
+                                                           scaleA,
+                                                           scaleB,
+                                                           targetGrad,
+                                                           outStride);
+  CHECK_SYNC("hl_maxpool3D_backward");
+}
+
+__global__ void KeAvgPool3DForward(const int nthreads,
+                                   const real* inputData,
+                                   const int channels,
+                                   const int depth,
+                                   const int height,
+                                   const int width,
+                                   const int pooledD,
+                                   const int pooledH,
+                                   const int pooledW,
+                                   const int sizeZ,
+                                   const int sizeY,
+                                   const int sizeX,
+                                   const int strideD,
+                                   const int strideH,
+                                   const int strideW,
+                                   const int padD,
+                                   const int padH,
+                                   const int padW,
+                                   real* tgtData,
+                                   const int tgtStride) {
+  for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads);
+       index += blockDim.x * gridDim.x) {
+    int pw = index % pooledW;
+    int ph = (index / pooledW) % pooledH;
+    int pd = (index / pooledW / pooledH) % pooledD;
+    int c = (index / pooledW / pooledH / pooledD) % channels;
+    int frameNum = index / pooledW / pooledH / pooledD / channels;
+    int dstart = pd * strideD - padD;
+    int hstart = ph * strideH - padH;
+    int wstart = pw * strideW - padW;
+    int dend = min(dstart + sizeZ, depth + padD);
+    int hend = min(hstart + sizeY, height + padH);
+    int wend = min(wstart + sizeX, width + padW);
+    int pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart);
+    dstart = max(dstart, 0);
+    hstart = max(hstart, 0);
+    wstart = max(wstart, 0);
+    dend = min(dend, depth);
+    hend = min(hend, height);
+    wend = min(wend, width);
+
+    real aveval = 0;
+    inputData += (frameNum * channels + c) * depth * height * width;
+    for (int d = dstart; d < dend; ++d) {
+      for (int h = hstart; h < hend; ++h) {
+        for (int w = wstart; w < wend; ++w) {
+          aveval += inputData[(d * height + h) * width + w];
+        }
+      }
+    }
+    int tgtIndex =
+        index % (pooledW * pooledH * pooledD * channels) + frameNum * tgtStride;
+    tgtData[tgtIndex] = aveval / pool_size;
+  }
+}
+
+void hl_avgpool3D_forward(const int frameCnt,
+                          const real* inputData,
+                          const int channels,
+                          const int depth,
+                          const int height,
+                          const int width,
+                          const int pooledD,
+                          const int pooledH,
+                          const int pooledW,
+                          const int sizeZ,
+                          const int sizeY,
+                          const int sizeX,
+                          const int strideD,
+                          const int strideH,
+                          const int strideW,
+                          const int paddingD,
+                          const int paddingH,
+                          const int paddingW,
+                          real* tgtData,
+                          const int tgtStride) {
+  int num_kernels = pooledD * pooledH * pooledW * channels * frameCnt;
+  int blocks = (num_kernels + 1024 - 1) / 1024;
+  KeAvgPool3DForward<<<blocks, 1024, 0, STREAM_DEFAULT>>>(num_kernels,
+                                                          inputData,
+                                                          channels,
+                                                          depth,
+                                                          height,
+                                                          width,
+                                                          pooledD,
+                                                          pooledH,
+                                                          pooledW,
+                                                          sizeZ,
+                                                          sizeY,
+                                                          sizeX,
+                                                          strideD,
+                                                          strideH,
+                                                          strideW,
+                                                          paddingD,
+                                                          paddingH,
+                                                          paddingW,
+                                                          tgtData,
+                                                          tgtStride);
+  CHECK_SYNC("hl_avgpool3D_forward failed");
+}
+
+__global__ void KeAvgPool3DBackward(const int nthreads,
+                                    const real* outGrad,
+                                    const int channels,
+                                    const int depth,
+                                    const int height,
+                                    const int width,
+                                    const int pooledD,
+                                    const int pooledH,
+                                    const int pooledW,
+                                    const int sizeZ,
+                                    const int sizeY,
+                                    const int sizeX,
+                                    const int strideD,
+                                    const int strideH,
+                                    const int strideW,
+                                    const int padD,
+                                    const int padH,
+                                    const int padW,
+                                    real scaleA,
+                                    real scaleB,
+                                    real* tgtGrad,
+                                    const int outStride) {
+  for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads);
+       index += blockDim.x * gridDim.x) {
+    int offsetW = index % width + padW;
+    int offsetH = (index / width) % height + padH;
+    int offsetD = (index / width / height) % depth + padD;
+    int offsetC = (index / width / height / depth) % channels;
+    int frameNum = index / width / height / depth / channels;
+
+    int pdstart = (offsetD < sizeZ) ? 0 : (offsetD - sizeZ) / strideD + 1;
+    int phstart = (offsetH < sizeY) ? 0 : (offsetH - sizeY) / strideH + 1;
+    int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / strideW + 1;
+    int pdend = min(offsetD / strideD + 1, pooledD);
+    int phend = min(offsetH / strideH + 1, pooledH);
+    int pwend = min(offsetW / strideW + 1, pooledW);
+
+    real gradient = 0;
+    outGrad += (frameNum * channels + offsetC) * pooledD * pooledH * pooledW;
+
+    for (int pd = pdstart; pd < pdend; ++pd) {
+      for (int ph = phstart; ph < phend; ++ph) {
+        for (int pw = pwstart; pw < pwend; ++pw) {
+          // figure out the pooling size
+          int dstart = pd * strideD - padD;
+          int hstart = ph * strideH - padH;
+          int wstart = pw * strideW - padW;
+          int dend = min(dstart + sizeZ, depth + padD);
+          int hend = min(hstart + sizeY, height + padH);
+          int wend = min(wstart + sizeX, width + padW);
+          int poolsize = (dend - dstart) * (hend - hstart) * (wend - wstart);
+          gradient += outGrad[(pd * pooledH + ph) * pooledW + pw] / poolsize;
+        }
+      }
+    }
+    tgtGrad[index] = scaleA * gradient + scaleB * tgtGrad[index];
+  }
+}
+
+void hl_avgpool3D_backward(const int frameCnt,
+                           const real* outGrad,
+                           const int channels,
+                           const int depth,
+                           const int height,
+                           const int width,
+                           const int outputD,
+                           const int outputH,
+                           const int outputW,
+                           const int sizeZ,
+                           const int sizeY,
+                           const int sizeX,
+                           const int strideD,
+                           const int strideH,
+                           const int strideW,
+                           int paddingD,
+                           int paddingH,
+                           int paddingW,
+                           real scaleA,
+                           real scaleB,
+                           real* backGrad,
+                           const int outStride) {
+  int num_kernels = depth * height * width * channels * frameCnt;
+  int blocks = (num_kernels + 1024 - 1) / 1024;
+
+  KeAvgPool3DBackward<<<blocks, 1024, 0, STREAM_DEFAULT>>>(num_kernels,
+                                                           outGrad,
+                                                           channels,
+                                                           depth,
+                                                           height,
+                                                           width,
+                                                           outputD,
+                                                           outputH,
+                                                           outputW,
+                                                           sizeZ,
+                                                           sizeY,
+                                                           sizeX,
+                                                           strideD,
+                                                           strideH,
+                                                           strideW,
+                                                           paddingD,
+                                                           paddingH,
+                                                           paddingW,
+                                                           scaleA,
+                                                           scaleB,
+                                                           backGrad,
+                                                           outStride);
+  CHECK_SYNC("hl_avgpool3D_backward failed");
+}
+/////////////////
+
 __global__ void KeBilinearInterpFw(const real* in,
                                    const size_t inImgH,
                                    const size_t inImgW,
diff --git a/paddle/gserver/layers/Pool3DLayer.cpp b/paddle/gserver/layers/Pool3DLayer.cpp
new file mode 100644
index 0000000000..fc6b9bdd2f
--- /dev/null
+++ b/paddle/gserver/layers/Pool3DLayer.cpp
@@ -0,0 +1,198 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "Pool3DLayer.h"
+#include "PoolProjectionLayer.h"
+#include "paddle/utils/Logging.h"
+
+namespace paddle {
+
+REGISTER_LAYER(pool3d, Pool3DLayer);
+
+bool Pool3DLayer::init(const LayerMap& layerMap,
+                       const ParameterMap& parameterMap) {
+  Layer::init(layerMap, parameterMap);
+
+  /* the size of inputs for pool-layer is 1 */
+  CHECK_EQ(config_.inputs_size(), 1);
+
+  const PoolConfig& conf = config_.inputs(0).pool_conf();
+  poolType_ = conf.pool_type();
+  channels_ = conf.channels();
+
+  sizeX_ = conf.size_x();
+  sizeY_ = conf.size_y();
+  sizeZ_ = conf.size_z();
+
+  strideW_ = conf.stride();
+  strideH_ = conf.stride_y();
+  strideD_ = conf.stride_z();
+
+  imgSizeW_ = conf.img_size();
+  imgSizeH_ = conf.img_size_y();
+  imgSizeD_ = conf.img_size_z();
+
+  paddingW_ = conf.padding();
+  paddingH_ = conf.padding_y();
+  paddingD_ = conf.padding_z();
+
+  outputW_ = conf.output_x();
+  outputH_ = conf.output_y();
+  outputD_ = conf.output_z();
+
+  return true;
+}
+
+size_t Pool3DLayer::getSize() {
+  CHECK_EQ(inputLayers_.size(), 1UL);
+
+  size_t layerSize = 0;
+  //  imgSizeD_ = inputLayers_[0]->getOutput().getFrameDepth();
+  //  imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
+  //  imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
+  if (imgSizeH_ == 0) {
+    //    imgSizeH_ = imgSizeY_;
+  }
+  if (imgSizeW_ == 0) {
+    //    imgSizeW_ = imgSize_;
+  }
+  outputD_ = outputSize(imgSizeD_,
+                        sizeZ_,
+                        paddingD_,
+                        strideD_,
+                        /* caffeMode */ false);
+  outputH_ = outputSize(imgSizeH_,
+                        sizeY_,
+                        paddingH_,
+                        strideH_,
+                        /* caffeMode */ false);
+  outputW_ = outputSize(imgSizeW_,
+                        sizeX_,
+                        paddingW_,
+                        strideW_,
+                        /* caffeMode */ false);
+
+  layerSize = outputD_ * outputH_ * outputW_ * channels_;
+  getOutput().setFrameHeight(outputH_);
+  getOutput().setFrameWidth(outputW_);
+  getOutput().setFrameDepth(outputD_);
+  return layerSize;
+}
+
+void Pool3DLayer::forward(PassType passType) {
+  Layer::forward(passType);
+  const MatrixPtr& inMat = inputLayers_[0]->getOutputValue();
+  int batchSize = inMat->getHeight();
+  int outWidth = getSize();
+  resetOutput(batchSize, outWidth);
+  const MatrixPtr outMat = getOutputValue();
+
+  if (poolType_ == "avg") {
+    outMat->avgPool3DForward(*inMat,
+                             imgSizeD_,
+                             imgSizeH_,
+                             imgSizeW_,
+                             channels_,
+                             sizeZ_,
+                             sizeY_,
+                             sizeX_,
+                             strideD_,
+                             strideH_,
+                             strideW_,
+                             outputD_,
+                             outputH_,
+                             outputW_,
+                             paddingD_,
+                             paddingH_,
+                             paddingW_);
+  } else if (poolType_ == "max") {
+    outMat->maxPool3DForward(*inMat,
+                             imgSizeD_,
+                             imgSizeH_,
+                             imgSizeW_,
+                             channels_,
+                             sizeZ_,
+                             sizeY_,
+                             sizeX_,
+                             strideD_,
+                             strideH_,
+                             strideW_,
+                             outputD_,
+                             outputH_,
+                             outputW_,
+                             paddingD_,
+                             paddingH_,
+                             paddingW_);
+  } else {
+    LOG(FATAL) << "Unknown pool type: " << poolType_;
+  }
+  forwardActivation();
+}
+
+void Pool3DLayer::backward(const UpdateCallback& callback) {
+  backwardActivation();
+
+  (void)callback;
+  if (NULL == getInputGrad(0)) return;
+  MatrixPtr inMat = inputLayers_[0]->getOutputValue();
+  MatrixPtr inGradMat = inputLayers_[0]->getOutputGrad();
+  MatrixPtr outMat = getOutputValue();
+  MatrixPtr outGradMat = getOutputGrad();
+
+  if (poolType_ == "avg") {
+    inGradMat->avgPool3DBackward(*outGradMat,
+                                 imgSizeD_,
+                                 imgSizeH_,
+                                 imgSizeW_,
+                                 sizeZ_,
+                                 sizeY_,
+                                 sizeZ_,
+                                 strideD_,
+                                 strideH_,
+                                 strideW_,
+                                 outputD_,
+                                 outputH_,
+                                 outputW_,
+                                 1,
+                                 1,
+                                 paddingD_,
+                                 paddingH_,
+                                 paddingW_);
+  } else if (poolType_ == "max") {
+    inGradMat->maxPool3DBackward(*inMat,
+                                 imgSizeD_,
+                                 imgSizeH_,
+                                 imgSizeW_,
+                                 *outGradMat,
+                                 *outMat,
+                                 sizeZ_,
+                                 sizeY_,
+                                 sizeZ_,
+                                 strideD_,
+                                 strideH_,
+                                 strideW_,
+                                 outputD_,
+                                 outputH_,
+                                 outputW_,
+                                 1,
+                                 1,
+                                 paddingD_,
+                                 paddingH_,
+                                 paddingW_);
+  } else {
+    LOG(FATAL) << "Unknown pool type: " << poolType_;
+  }
+}
+
+}  // namespace paddle
diff --git a/paddle/gserver/layers/Pool3DLayer.h b/paddle/gserver/layers/Pool3DLayer.h
new file mode 100644
index 0000000000..afc65ac2b0
--- /dev/null
+++ b/paddle/gserver/layers/Pool3DLayer.h
@@ -0,0 +1,48 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <vector>
+#include "Layer.h"
+#include "paddle/math/MathUtils.h"
+#include "paddle/math/Matrix.h"
+
+namespace paddle {
+
+/**
+ * @brief Basic parent layer of pooling
+ * Pools the input within regions
+ */
+class Pool3DLayer : public Layer {
+public:
+  explicit Pool3DLayer(const LayerConfig& config) : Layer(config) {}
+  ~Pool3DLayer() {}
+
+  bool init(const LayerMap& layerMap,
+            const ParameterMap& parameterMap) override;
+  void forward(PassType passType) override;
+  void backward(const UpdateCallback& callback) override;
+  size_t getSize();
+
+protected:
+  int channels_;
+  int sizeX_, sizeY_, sizeZ_;
+  int strideW_, strideH_, strideD_;
+  int paddingW_, paddingH_, paddingD_;
+  int imgSizeW_, imgSizeH_, imgSizeD_;
+  int outputW_, outputH_, outputD_;
+  std::string poolType_;
+};
+}  // namespace paddle
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index 0f312b6ca5..43fb255ae0 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -1206,6 +1206,75 @@ TEST(Layer, PoolLayer) {
 #endif
 }
 
+void setPool3DConfig(TestConfig* config,
+                     PoolConfig* pool,
+                     const string& poolType) {
+  // filter size
+  const int NUM_FILTERS = 16;
+  const int FILTER_SIZE = 3;
+  const int FILTER_SIZE_Y = 3;
+  const int FILTER_SIZE_Z = 3;
+  const int CHANNELS = 16;
+
+  (*config).biasSize = 0;
+  (*config).layerConfig.set_type("pool3d");
+  (*config).layerConfig.set_num_filters(NUM_FILTERS);
+
+  int kw = FILTER_SIZE, kh = FILTER_SIZE_Y, kd = FILTER_SIZE_Z;
+  int pw = 0, ph = 0, pd = 0;
+  int sw = 2, sh = 2, sd = 2;
+
+  pool->set_pool_type(poolType);
+  pool->set_pool_type("avg");
+  pool->set_channels(CHANNELS);
+  pool->set_size_x(kw);
+  pool->set_size_y(kh);
+  pool->set_size_z(kd);
+  pool->set_padding(0);
+  pool->set_padding_y(0);
+  pool->set_padding_z(0);
+  pool->set_stride(sw);
+  pool->set_stride_y(sh);
+  pool->set_stride_z(sd);
+  pool->set_start(0);
+  int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false);
+  int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false);
+  int od = outputSize(pool->img_size_z(), kd, pd, sd, /* caffeMode */ false);
+  pool->set_output_x(ow);
+  pool->set_output_y(oh);
+  pool->set_output_z(od);
+}
+
+void testPool3DLayer(const string& poolType, bool trans, bool useGpu) {
+  TestConfig config;
+  config.inputDefs.push_back({INPUT_DATA, "layer_0", 11664, 0});
+  LayerInputConfig* input = config.layerConfig.add_inputs();
+  PoolConfig* pool = input->mutable_pool_conf();
+
+  const int IMAGE_SIZE = 9;
+  const int IMAGE_SIZE_Y = 9;
+  const int IMAGE_SIZE_Z = 9;
+
+  pool->set_img_size(IMAGE_SIZE);
+  pool->set_img_size_y(IMAGE_SIZE_Y);
+  pool->set_img_size_z(IMAGE_SIZE_Z);
+
+  setPool3DConfig(&config, pool, poolType);
+  config.layerConfig.set_size(pool->output_x() * pool->output_y() *
+                              pool->channels());
+
+  testLayerGrad(config, "pool3d", 100, trans, useGpu);
+}
+
+TEST(Layer, Pool3DLayer) {
+  testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ false);
+  testPool3DLayer("max", /* trans= */ false, /* useGpu= */ false);
+#ifndef PADDLE_ONLY_CPU
+  testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ true);
+  testPool3DLayer("max", /* trans= */ false, /* useGpu= */ true);
+#endif
+}
+
 void testSppLayer(const string& poolType,
                   const int pyramidHeight,
                   bool trans,
diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp
index 27f7d95b75..e7f1489b8b 100644
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
@@ -1190,6 +1190,224 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad,
                       outGrad.getStride());
 }
 
+void GpuMatrix::maxPool3DForward(Matrix& inputMat,
+                                 size_t imgSizeD,
+                                 size_t imgSizeH,
+                                 size_t imgSizeW,
+                                 size_t channels,
+                                 size_t sizeZ,
+                                 size_t sizeY,
+                                 size_t sizeX,
+                                 size_t strideD,
+                                 size_t strideH,
+                                 size_t strideW,
+                                 size_t outputD,
+                                 size_t outputH,
+                                 size_t outputW,
+                                 size_t paddingD,
+                                 size_t paddingH,
+                                 size_t paddingW) {
+  CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal";
+
+  real* inputData = inputMat.getData();
+  size_t num = inputMat.getHeight();
+  size_t width = imgSizeW;
+  size_t height = imgSizeH;
+  size_t depth = imgSizeD;
+  CHECK(depth * height * width * channels == inputMat.getWidth());
+  CHECK(height_ == inputMat.getHeight());
+  CHECK(width_ == outputD * outputH * outputW * channels);
+
+  hl_maxpool3D_forward(num,
+                       inputData,
+                       channels,
+                       depth,
+                       height,
+                       width,
+                       outputD,
+                       outputH,
+                       outputW,
+                       sizeZ,
+                       sizeY,
+                       sizeX,
+                       strideD,
+                       strideH,
+                       strideW,
+                       paddingD,
+                       paddingH,
+                       paddingW,
+                       data_,
+                       getStride());
+}
+
+void GpuMatrix::maxPool3DBackward(Matrix& inputMat,
+                                  size_t imgSizeD,
+                                  size_t imgSizeH,
+                                  size_t imgSizeW,
+                                  Matrix& outGrad,
+                                  Matrix& outV,
+                                  size_t sizeZ,
+                                  size_t sizeY,
+                                  size_t sizeX,
+                                  size_t strideD,
+                                  size_t strideH,
+                                  size_t strideW,
+                                  size_t outputD,
+                                  size_t outputH,
+                                  size_t outputW,
+                                  real scaleTargets,
+                                  real scaleOutput,
+                                  size_t paddingD,
+                                  size_t paddingH,
+                                  size_t paddingW) {
+  CHECK(inputMat.useGpu_ == true && outGrad.useGpu_ == true &&
+        outV.useGpu_ == true)
+      << "Matrix type are not equal";
+
+  real* inputData = inputMat.getData();
+  real* outData = outV.getData();
+  real* outDiff = outGrad.getData();
+  size_t frameNum = inputMat.getHeight();
+  size_t channels = outV.getWidth() / outputD / outputH / outputW;
+  size_t width = imgSizeW;
+  size_t height = imgSizeH;
+  size_t depth = imgSizeD;
+  CHECK(depth * height * width * channels == inputMat.getWidth());
+  CHECK(height_ == inputMat.getHeight());
+  CHECK(width_ == depth * width * height * channels);
+  CHECK(outGrad.getHeight() == outV.getHeight() &&
+        outGrad.getWidth() == outV.getWidth());
+
+  hl_maxpool3D_backward(frameNum,
+                        inputData,
+                        outData,
+                        outDiff,
+                        channels,
+                        depth,
+                        height,
+                        width,
+                        outputD,
+                        outputH,
+                        outputW,
+                        sizeZ,
+                        sizeY,
+                        sizeX,
+                        strideD,
+                        strideH,
+                        strideW,
+                        paddingD,
+                        paddingH,
+                        paddingW,
+                        scaleTargets,
+                        scaleOutput,
+                        data_,
+                        outGrad.getStride());
+}
+
+void GpuMatrix::avgPool3DForward(Matrix& inputMat,
+                                 size_t imgSizeD,
+                                 size_t imgSizeH,
+                                 size_t imgSizeW,
+                                 size_t channels,
+                                 size_t sizeZ,
+                                 size_t sizeY,
+                                 size_t sizeX,
+                                 size_t strideD,
+                                 size_t strideH,
+                                 size_t strideW,
+                                 size_t outputD,
+                                 size_t outputH,
+                                 size_t outputW,
+                                 size_t paddingD,
+                                 size_t paddingH,
+                                 size_t paddingW) {
+  CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal";
+
+  real* inputData = inputMat.getData();
+  size_t frameNum = inputMat.getHeight();
+  size_t height = imgSizeH;
+  size_t width = imgSizeW;
+  size_t depth = imgSizeD;
+  CHECK(depth * height * width * channels == inputMat.getWidth());
+  CHECK(height_ == inputMat.getHeight());
+  CHECK(width_ == outputD * outputH * outputW * channels);
+
+  hl_avgpool3D_forward(frameNum,
+                       inputData,
+                       channels,
+                       depth,
+                       height,
+                       width,
+                       outputD,
+                       outputH,
+                       outputW,
+                       sizeZ,
+                       sizeY,
+                       sizeX,
+                       strideD,
+                       strideH,
+                       strideW,
+                       paddingD,
+                       paddingH,
+                       paddingW,
+                       data_,
+                       getStride());
+}
+
+void GpuMatrix::avgPool3DBackward(Matrix& outGrad,
+                                  size_t imgSizeD,
+                                  size_t imgSizeH,
+                                  size_t imgSizeW,
+                                  size_t sizeZ,
+                                  size_t sizeY,
+                                  size_t sizeX,
+                                  size_t strideD,
+                                  size_t strideH,
+                                  size_t strideW,
+                                  size_t outputD,
+                                  size_t outputH,
+                                  size_t outputW,
+                                  real scaleTargets,
+                                  real scaleOutput,
+                                  size_t paddingD,
+                                  size_t paddingH,
+                                  size_t paddingW) {
+  CHECK(outGrad.useGpu_ == true) << "Matrix type are not equal";
+
+  real* outDiff = outGrad.getData();
+  size_t frameNum = outGrad.getHeight();
+  size_t channels = outGrad.getWidth() / outputD / outputH / outputW;
+  size_t height = imgSizeH;
+  size_t width = imgSizeW;
+  size_t depth = imgSizeD;
+  CHECK(depth * height * width * channels == width_);
+  CHECK(height_ == outGrad.getHeight());
+  CHECK(outGrad.getWidth() == outputD * outputH * outputW * channels);
+
+  hl_avgpool3D_backward(frameNum,
+                        outDiff,
+                        channels,
+                        depth,
+                        height,
+                        width,
+                        outputD,
+                        outputH,
+                        outputW,
+                        sizeZ,
+                        sizeY,
+                        sizeX,
+                        strideD,
+                        strideH,
+                        strideW,
+                        paddingD,
+                        paddingH,
+                        paddingW,
+                        scaleTargets,
+                        scaleOutput,
+                        data_,
+                        outGrad.getStride());
+}
+
 void GpuMatrix::maxSequenceForward(Matrix& input,
                                    const IVector& sequence,
                                    IVector& index) {
@@ -1930,6 +2148,290 @@ void CpuMatrix::avgPoolBackward(Matrix& input,
   }
 }
 
+void CpuMatrix::maxPool3DForward(Matrix& inputMat,
+                                 size_t imgSizeD,
+                                 size_t imgSizeH,
+                                 size_t imgSizeW,
+                                 size_t channels,
+                                 size_t sizeZ,
+                                 size_t sizeY,
+                                 size_t sizeX,
+                                 size_t strideD,
+                                 size_t strideH,
+                                 size_t strideW,
+                                 size_t outputD,
+                                 size_t outputH,
+                                 size_t outputW,
+                                 size_t paddingD,
+                                 size_t paddingH,
+                                 size_t paddingW) {
+  real* inputData = inputMat.getData();
+  real* outData = data_;
+  size_t num = inputMat.getHeight();
+  size_t inWidth = imgSizeW;
+  size_t inHeight = imgSizeH;
+  size_t inDepth = imgSizeD;
+  CHECK(inHeight * inWidth * inDepth == inputMat.getWidth() / channels);
+  CHECK_EQ(num, this->getHeight());
+  CHECK_EQ(channels * outputH * outputW * outputD, this->getWidth());
+  size_t outStride = getStride();
+
+  /* initialize the data_ */
+  for (size_t i = 0; i < height_; i++) {
+    for (size_t j = 0; j < width_; j++) {
+      outData[(i)*outStride + j] = -(real)FLT_MAX;
+    }
+  }
+
+  /* pool max one by one */
+  for (size_t n = 0; n < num; ++n) {  // frame by frame
+    if (!isContiguous()) {
+      outData = data_ + n * outStride;
+    }
+    for (size_t c = 0; c < channels; ++c) {  // channel by channel
+      for (size_t pd = 0; pd < outputD; ++pd) {
+        for (size_t ph = 0; ph < outputH; ++ph) {
+          for (size_t pw = 0; pw < outputW; ++pw) {
+            int dstart = pd * strideD - paddingD;
+            int hstart = ph * strideH - paddingH;
+            int wstart = pw * strideW - paddingW;
+            int dend = std::min(dstart + sizeZ, inDepth);
+            int hend = std::min(hstart + sizeY, inHeight);
+            int wend = std::min(wstart + sizeX, inWidth);
+            dstart = std::max(dstart, 0);
+            hstart = std::max(hstart, 0);
+            wstart = std::max(wstart, 0);
+            for (int d = dstart; d < dend; ++d) {
+              for (int h = hstart; h < hend; ++h) {
+                for (int w = wstart; w < wend; ++w) {
+                  outData[(pd * outputH + ph) * outputW + pw] =
+                      std::max(outData[(pd * outputH + ph) * outputW + pw],
+                               inputData[(d * inHeight + h) * inWidth + w]);
+                }
+              }
+            }
+          }
+        }
+      }
+      // compute offset
+      inputData += inDepth * inHeight * inWidth;
+      outData += outputD * outputH * outputW;
+    }
+  }
+}
+
+void CpuMatrix::maxPool3DBackward(Matrix& image,
+                                  size_t imgSizeD,
+                                  size_t imgSizeH,
+                                  size_t imgSizeW,
+                                  Matrix& outGrad,
+                                  Matrix& outV,
+                                  size_t sizeZ,
+                                  size_t sizeY,
+                                  size_t sizeX,
+                                  size_t strideD,
+                                  size_t strideH,
+                                  size_t strideW,
+                                  size_t outputD,
+                                  size_t outputH,
+                                  size_t outputW,
+                                  real scaleTargets,
+                                  real scaleOutput,
+                                  size_t paddingD,
+                                  size_t paddingH,
+                                  size_t paddingW) {
+  size_t num = image.getHeight();
+  size_t channels = size_t(width_ / imgSizeD / imgSizeH / imgSizeW);
+  CHECK(image.getWidth() == imgSizeD * imgSizeH * imgSizeW * channels);
+  CHECK(image.getHeight() == height_ && image.getWidth() == width_);
+  CHECK(outV.getHeight() == outGrad.getHeight() &&
+        outV.getWidth() == outGrad.getWidth());
+
+  real* tgtGrad = data_;
+  real* inData = image.getData();
+  real* otData = outV.getData();
+  real* otGrad = outGrad.getData();
+
+  size_t outStride = outV.getStride();
+  real* origOutData = otData;
+  real* origOutGrad = otGrad;
+
+  for (size_t n = 0; n < num; ++n) {
+    if (!outV.isContiguous()) {
+      otData = origOutData + n * outStride;
+      otGrad = origOutGrad + n * outStride;
+    }
+    for (size_t c = 0; c < channels; ++c) {
+      for (size_t pd = 0; pd < outputD; ++pd) {
+        for (size_t ph = 0; ph < outputH; ++ph) {
+          for (size_t pw = 0; pw < outputW; ++pw) {
+            int dstart = pd * strideD - paddingD;
+            int hstart = ph * strideH - paddingH;
+            int wstart = pw * strideW - paddingW;
+            int dend = std::min(dstart + sizeZ, imgSizeD);
+            int hend = std::min(hstart + sizeY, imgSizeH);
+            int wend = std::min(wstart + sizeX, imgSizeW);
+            dstart = std::max(dstart, 0);
+            hstart = std::max(hstart, 0);
+            wstart = std::max(wstart, 0);
+            for (int d = 0; d < dend; ++d) {
+              for (int h = hstart; h < hend; ++h) {
+                for (int w = wstart; w < wend; ++w) {
+                  tgtGrad[(d * imgSizeH + h) * imgSizeW + w] =
+                      scaleTargets *
+                          tgtGrad[(d * imgSizeH + h) * imgSizeW + w] +
+                      scaleOutput * otGrad[(pd * outputH + ph) * outputW + pw] *
+                          (inData[(d * imgSizeH + h) * imgSizeW + w] ==
+                           otData[(pd * outputH + ph) * outputW + pw]);
+                }
+              }
+            }
+          }
+        }
+      }
+      // offset
+      inData += imgSizeD * imgSizeH * imgSizeW;
+      tgtGrad += imgSizeD * imgSizeH * imgSizeW;
+      otData += outputD * outputH * outputW;
+      otGrad += outputD * outputH * outputW;
+    }
+  }
+}
+
+void CpuMatrix::avgPool3DForward(Matrix& input,
+                                 size_t imgSizeD,
+                                 size_t imgSizeH,
+                                 size_t imgSizeW,
+                                 size_t channels,
+                                 size_t sizeZ,
+                                 size_t sizeY,
+                                 size_t sizeX,
+                                 size_t strideD,
+                                 size_t strideH,
+                                 size_t strideW,
+                                 size_t outputD,
+                                 size_t outputH,
+                                 size_t outputW,
+                                 size_t paddingD,
+                                 size_t paddingH,
+                                 size_t paddingW) {
+  // The main loop
+  size_t num = input.getHeight();
+  size_t inDepth = imgSizeD;
+  size_t inHeight = imgSizeH;
+  size_t inWidth = imgSizeW;
+  CHECK(inDepth * inHeight * inWidth * channels == input.getWidth());
+  CHECK(outputD * outputH * outputW * channels * num == height_ * width_);
+  real* tgtData = data_;
+  real* inData = input.getData();
+
+  for (size_t n = 0; n < num; ++n) {
+    if (!isContiguous()) {
+      tgtData = data_ + n * getStride();
+    }
+    for (size_t c = 0; c < channels; ++c) {
+      for (size_t pd = 0; pd < outputD; ++pd) {
+        for (size_t ph = 0; ph < outputH; ++ph) {
+          for (size_t pw = 0; pw < outputW; ++pw) {
+            int dstart = pd * strideD - paddingD;
+            int hstart = ph * strideH - paddingH;
+            int wstart = pw * strideW - paddingW;
+            int dend = std::min(dstart + sizeZ, inDepth + paddingD);
+            int hend = std::min(hstart + sizeY, inHeight + paddingH);
+            int wend = std::min(wstart + sizeX, inWidth + paddingW);
+            int poolSize = (dend - dstart) * (hend - hstart) * (wend - wstart);
+            dstart = std::max(dstart, 0);
+            hstart = std::max(hstart, 0);
+            wstart = std::max(wstart, 0);
+            dend = std::min(dend, static_cast<int>(inDepth));
+            hend = std::min(hend, static_cast<int>(inHeight));
+            wend = std::min(wend, static_cast<int>(inWidth));
+
+            CHECK(poolSize);
+            tgtData[(pd * outputH + ph) * outputW + pw] = 0;  // clear
+            for (int d = dstart; d < dend; ++d) {
+              for (int h = hstart; h < hend; ++h) {
+                for (int w = wstart; w < wend; ++w) {
+                  tgtData[(pd * outputH + ph) * outputW + pw] +=
+                      inData[(d * inHeight + h) * inWidth + w];
+                }
+              }
+            }
+            tgtData[(pd * outputH + ph) * outputW + pw] /= poolSize;
+          }
+        }
+      }
+      // compute offset
+      inData += inDepth * inHeight * inWidth;
+      tgtData += outputD * outputH * outputW;
+    }
+  }
+}
+
+void CpuMatrix::avgPool3DBackward(Matrix& input,
+                                  size_t imgSizeD,
+                                  size_t imgSizeH,
+                                  size_t imgSizeW,
+                                  size_t sizeZ,
+                                  size_t sizeY,
+                                  size_t sizeX,
+                                  size_t strideD,
+                                  size_t strideH,
+                                  size_t strideW,
+                                  size_t outputD,
+                                  size_t outputH,
+                                  size_t outputW,
+                                  real scaleTargets,
+                                  real scaleOutput,
+                                  size_t paddingD,
+                                  size_t paddingH,
+                                  size_t paddingW) {
+  size_t num = input.getHeight();
+  size_t channels = input.getWidth() / outputD / outputH / outputW;
+  CHECK(imgSizeD * imgSizeH * imgSizeW * channels == getWidth());
+  real* inData = input.getData();
+  real* outData = getData();
+
+  for (size_t n = 0; n < num; ++n) {
+    if (!input.isContiguous()) {
+      inData = input.getData() + n * input.getStride();
+    }
+    for (size_t c = 0; c < channels; ++c) {
+      for (size_t pd = 0; pd < outputD; ++pd) {
+        for (size_t ph = 0; ph < outputH; ++ph) {
+          for (size_t pw = 0; pw < outputW; ++pw) {
+            int dstart = pd * strideD - paddingD;
+            int hstart = ph * strideH - paddingH;
+            int wstart = pw * strideW - paddingW;
+            int dend = std::min(dstart + sizeZ, imgSizeD + paddingD);
+            int hend = std::min(hstart + sizeY, imgSizeH + paddingH);
+            int wend = std::min(wstart + sizeX, imgSizeW + paddingW);
+            int poolSize = (dend - dstart) * (hend - hstart) * (wend - wstart);
+            dstart = std::max(dstart, 0);
+            hstart = std::max(hstart, 0);
+            wstart = std::max(wstart, 0);
+            dend = std::min(dend, static_cast<int>(imgSizeD));
+            hend = std::min(hend, static_cast<int>(imgSizeH));
+            wend = std::min(wend, static_cast<int>(imgSizeW));
+            CHECK(poolSize);
+            for (int d = dstart; d < dend; ++d) {
+              for (int h = hstart; h < hend; ++h) {
+                for (int w = wstart; w < wend; ++w) {
+                  outData[(d * imgSizeH + h) * imgSizeW + w] +=
+                      inData[(pd * outputH + ph) * outputW + pw] / poolSize;
+                }
+              }
+            }
+          }
+        }
+      }
+      // offset
+      outData += imgSizeD * imgSizeH * imgSizeW;
+      inData += outputD * outputH * outputW;
+    }
+  }
+}
+
 /**
  * Input: one or more sequences. Each sequence contains some instances.
  * Output: output size is the number of input sequences (NOT input instances).
diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h
index bb802bbb2c..f1534c5ea0 100644
--- a/paddle/math/Matrix.h
+++ b/paddle/math/Matrix.h
@@ -928,15 +928,102 @@ public:
                                size_t paddingW) {
     LOG(FATAL) << "Not implemeted";
   }
-
   /**
-   * Input: one or more sequences. Each sequence contains some instances.
-   *
-   * Output: output size is the number of input sequences (NOT input
-   * instances).
-   *
-   * output[i] is set to max_input[i].
+   * Pooling 3D forward operation, pick out the largest element
+   * in the sizeX of value
    */
+  virtual void maxPool3DForward(Matrix& inputMat,
+                                size_t imgSizeD,
+                                size_t imgSizeH,
+                                size_t imgSizeW,
+                                size_t channels,
+                                size_t sizeZ,
+                                size_t sizeY,
+                                size_t sizeX,
+                                size_t strideD,
+                                size_t strideH,
+                                size_t strideW,
+                                size_t outputD,
+                                size_t outputH,
+                                size_t outputW,
+                                size_t paddingD,
+                                size_t paddingH,
+                                size_t paddingW) {
+    LOG(FATAL) << "Not implemeted";
+  }
+
+  virtual void maxPool3DBackward(Matrix& image,
+                                 size_t imgSizeD,
+                                 size_t imgSizeH,
+                                 size_t imgSizeW,
+                                 Matrix& outGrad,
+                                 Matrix& outV,
+                                 size_t sizeZ,
+                                 size_t sizeY,
+                                 size_t sizeX,
+                                 size_t strideD,
+                                 size_t strideH,
+                                 size_t strideW,
+                                 size_t outputD,
+                                 size_t outputH,
+                                 size_t outputW,
+                                 real scaleTargets,
+                                 real scaleOutput,
+                                 size_t paddingD,
+                                 size_t paddingH,
+                                 size_t paddingW) {
+    LOG(FATAL) << "Not implemeted";
+  }
+
+  virtual void avgPool3DForward(Matrix& input,
+                                size_t imgSizeD,
+                                size_t imgSizeH,
+                                size_t imgSizeW,
+                                size_t channels,
+                                size_t sizeZ,
+                                size_t sizeY,
+                                size_t sizeX,
+                                size_t strideD,
+                                size_t strideH,
+                                size_t strideW,
+                                size_t outputD,
+                                size_t outputH,
+                                size_t outputW,
+                                size_t paddingD,
+                                size_t paddingH,
+                                size_t paddingW) {
+    LOG(FATAL) << "Not implemeted";
+  }
+
+  virtual void avgPool3DBackward(Matrix& input,
+                                 size_t imgSizeD,
+                                 size_t imgSizeH,
+                                 size_t imgSizeW,
+                                 size_t sizeZ,
+                                 size_t sizeY,
+                                 size_t sizeX,
+                                 size_t strideD,
+                                 size_t strideH,
+                                 size_t strideW,
+                                 size_t outputD,
+                                 size_t outputH,
+                                 size_t outputW,
+                                 real scaleTargets,
+                                 real scaleOutput,
+                                 size_t paddingD,
+                                 size_t paddingH,
+                                 size_t paddingW) {
+    LOG(FATAL) << "Not implemeted";
+  }
+
+  /**
+ * Input: one or more sequences. Each sequence contains some instances.
+ *
+ * Output: output size is the number of input sequences (NOT input
+ * instances).
+ *
+ * output[i] is set to max_input[i].
+ */
   virtual void maxSequenceForward(Matrix& input,
                                   const IVector& sequence,
                                   IVector& index) {
@@ -1348,6 +1435,83 @@ public:
                        size_t paddingH,
                        size_t paddingW);
 
+  /////////////////////////
+  void maxPool3DForward(Matrix& inputMat,
+                        size_t imgSizeD,
+                        size_t imgSizeH,
+                        size_t imgSizeW,
+                        size_t channels,
+                        size_t sizeZ,
+                        size_t sizeY,
+                        size_t sizeX,
+                        size_t strideD,
+                        size_t strideH,
+                        size_t strideW,
+                        size_t outputD,
+                        size_t outputH,
+                        size_t outputW,
+                        size_t paddingD,
+                        size_t paddingH,
+                        size_t paddingW);
+
+  void maxPool3DBackward(Matrix& image,
+                         size_t imgSizeD,
+                         size_t imgSizeH,
+                         size_t imgSizeW,
+                         Matrix& outGrad,
+                         Matrix& outV,
+                         size_t sizeZ,
+                         size_t sizeY,
+                         size_t sizeX,
+                         size_t strideD,
+                         size_t strideH,
+                         size_t strideW,
+                         size_t outputD,
+                         size_t outputH,
+                         size_t outputW,
+                         real scaleTargets,
+                         real scaleOutput,
+                         size_t paddingD,
+                         size_t paddingH,
+                         size_t paddingW);
+
+  void avgPool3DForward(Matrix& input,
+                        size_t imgSizeD,
+                        size_t imgSizeH,
+                        size_t imgSizeW,
+                        size_t channels,
+                        size_t sizeZ,
+                        size_t sizeY,
+                        size_t sizeX,
+                        size_t strideD,
+                        size_t strideH,
+                        size_t strideW,
+                        size_t outputD,
+                        size_t outputH,
+                        size_t outputW,
+                        size_t paddingD,
+                        size_t paddingH,
+                        size_t paddingW);
+
+  void avgPool3DBackward(Matrix& input,
+                         size_t imgSizeD,
+                         size_t imgSizeH,
+                         size_t imgSizeW,
+                         size_t sizeZ,
+                         size_t sizeY,
+                         size_t sizeX,
+                         size_t strideD,
+                         size_t strideH,
+                         size_t strideW,
+                         size_t outputD,
+                         size_t outputH,
+                         size_t outputW,
+                         real scaleTargets,
+                         real scaleOutput,
+                         size_t paddingD,
+                         size_t paddingH,
+                         size_t paddingW);
+
   void maxSequenceForward(Matrix& input,
                           const IVector& sequence,
                           IVector& index);
@@ -1506,6 +1670,82 @@ public:
                        real scaleOutput,
                        size_t paddingH,
                        size_t paddingW);
+  //////////////////////
+  void maxPool3DForward(Matrix& inputMat,
+                        size_t imgSizeD,
+                        size_t imgSizeH,
+                        size_t imgSizeW,
+                        size_t channels,
+                        size_t sizeZ,
+                        size_t sizeY,
+                        size_t sizeX,
+                        size_t strideD,
+                        size_t strideH,
+                        size_t strideW,
+                        size_t outputD,
+                        size_t outputH,
+                        size_t outputW,
+                        size_t paddingD,
+                        size_t paddingH,
+                        size_t paddingW);
+
+  void maxPool3DBackward(Matrix& image,
+                         size_t imgSizeD,
+                         size_t imgSizeH,
+                         size_t imgSizeW,
+                         Matrix& outGrad,
+                         Matrix& outV,
+                         size_t sizeZ,
+                         size_t sizeY,
+                         size_t sizeX,
+                         size_t strideD,
+                         size_t strideH,
+                         size_t strideW,
+                         size_t outputD,
+                         size_t outputH,
+                         size_t outputW,
+                         real scaleTargets,
+                         real scaleOutput,
+                         size_t paddingD,
+                         size_t paddingH,
+                         size_t paddingW);
+
+  void avgPool3DForward(Matrix& input,
+                        size_t imgSizeD,
+                        size_t imgSizeH,
+                        size_t imgSizeW,
+                        size_t channels,
+                        size_t sizeZ,
+                        size_t sizeY,
+                        size_t sizeX,
+                        size_t strideD,
+                        size_t strideH,
+                        size_t strideW,
+                        size_t outputD,
+                        size_t outputH,
+                        size_t outputW,
+                        size_t paddingD,
+                        size_t paddingH,
+                        size_t paddingW);
+
+  void avgPool3DBackward(Matrix& input,
+                         size_t imgSizeD,
+                         size_t imgSizeH,
+                         size_t imgSizeW,
+                         size_t sizeZ,
+                         size_t sizeY,
+                         size_t sizeX,
+                         size_t strideD,
+                         size_t strideH,
+                         size_t strideW,
+                         size_t outputD,
+                         size_t outputH,
+                         size_t outputW,
+                         real scaleTargets,
+                         real scaleOutput,
+                         size_t paddingD,
+                         size_t paddingH,
+                         size_t paddingW);
 
   void maxSequenceForward(Matrix& input,
                           const IVector& sequence,
diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp
index d77478f345..7a961d2751 100644
--- a/paddle/math/tests/test_matrixCompare.cpp
+++ b/paddle/math/tests/test_matrixCompare.cpp
@@ -18,6 +18,7 @@ limitations under the License. */
 
 #include <gtest/gtest.h>
 #include "TensorCheck.h"
+#include "paddle/math/MathUtils.h"
 #include "paddle/math/Matrix.h"
 #include "paddle/math/SparseMatrix.h"
 #include "paddle/testing/TestUtil.h"
@@ -1203,4 +1204,207 @@ TEST(Matrix, warpCTC) {
   }
 }
 
+/////
+void testMatrixPool3D(int depth, int height, int width) {
+  int channel = 3;
+  int filterX = 3, filterY = 4, filterZ = 5;
+  int strideX = 2, strideY = 2, strideZ = 2;
+  int padX = 1, padY = 1, padZ = 1;
+
+  MatrixPtr cpuImage =
+      std::make_shared<CpuMatrix>(1, channel * depth * height * width);
+  MatrixPtr gpuImage =
+      std::make_shared<GpuMatrix>(1, channel * depth * height * width);
+
+  int outD = outputSize(depth, filterZ, padZ, strideZ, true);
+  int outH = outputSize(height, filterY, padZ, strideY, true);
+  int outW = outputSize(width, filterX, padZ, strideX, true);
+
+  int colBufWidth = outD * outH * outW;
+  MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(1, channel * colBufWidth);
+  MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(1, channel * colBufWidth);
+
+  cpuImage->randomizeUniform();
+  gpuImage->copyFrom(*cpuImage);
+  // std::cout << "test maxPool3DForward...\n";
+  cpuOutput->maxPool3DForward(*cpuImage,
+                              depth,
+                              height,
+                              width,
+                              channel,
+                              filterZ,
+                              filterY,
+                              filterX,
+                              strideZ,
+                              strideY,
+                              strideX,
+                              outD,
+                              outH,
+                              outW,
+                              padZ,
+                              padY,
+                              padX);
+  gpuOutput->maxPool3DForward(*gpuImage,
+                              depth,
+                              height,
+                              width,
+                              channel,
+                              filterZ,
+                              filterY,
+                              filterX,
+                              strideZ,
+                              strideY,
+                              strideX,
+                              outD,
+                              outH,
+                              outW,
+                              padZ,
+                              padY,
+                              padX);
+  TensorCheckErr(*cpuOutput, *gpuOutput);
+
+  cpuImage->randomizeUniform();
+  gpuImage->copyFrom(*cpuImage);
+  // std::cout << "test avgPool3DForward...\n";
+  cpuOutput->avgPool3DForward(*cpuImage,
+                              depth,
+                              height,
+                              width,
+                              channel,
+                              filterZ,
+                              filterY,
+                              filterX,
+                              strideZ,
+                              strideY,
+                              strideX,
+                              outD,
+                              outH,
+                              outW,
+                              padZ,
+                              padY,
+                              padX);
+
+  gpuOutput->avgPool3DForward(*gpuImage,
+                              depth,
+                              height,
+                              width,
+                              channel,
+                              filterZ,
+                              filterY,
+                              filterX,
+                              strideZ,
+                              strideY,
+                              strideX,
+                              outD,
+                              outH,
+                              outW,
+                              padZ,
+                              padY,
+                              padX);
+  TensorCheckErr(*cpuOutput, *gpuOutput);
+  cpuImage->randomizeUniform();
+  gpuImage->copyFrom(*cpuImage);
+  cpuOutput->randomizeUniform();
+  gpuOutput->copyFrom(*cpuOutput);
+  // std::cout << "test avgPool3DBackward...\n";
+  cpuImage->avgPool3DBackward(*cpuOutput,
+                              depth,
+                              height,
+                              width,
+                              filterZ,
+                              filterY,
+                              filterX,
+                              strideZ,
+                              strideY,
+                              strideX,
+                              outD,
+                              outH,
+                              outW,
+                              1,
+                              1,
+                              padZ,
+                              padY,
+                              padX);
+
+  gpuImage->avgPool3DBackward(*gpuOutput,
+                              depth,
+                              height,
+                              width,
+                              filterZ,
+                              filterY,
+                              filterX,
+                              strideZ,
+                              strideY,
+                              strideX,
+                              outD,
+                              outH,
+                              outW,
+                              1,
+                              1,
+                              padZ,
+                              padY,
+                              padX);
+  TensorCheckErr(*cpuImage, *gpuImage);
+
+  cpuImage->randomizeUniform();
+  gpuImage->copyFrom(*cpuImage);
+  cpuOutput->randomizeUniform();
+  gpuOutput->copyFrom(*cpuOutput);
+  // std::cout << "test maxPool3DBackward...\n";
+  cpuImage->maxPool3DBackward(*cpuImage,
+                              depth,
+                              height,
+                              width,
+                              *cpuOutput,
+                              *cpuOutput,
+                              filterZ,
+                              filterY,
+                              filterX,
+                              strideZ,
+                              strideY,
+                              strideX,
+                              outD,
+                              outH,
+                              outW,
+                              1,
+                              1,
+                              padZ,
+                              padY,
+                              padX);
+
+  gpuImage->maxPool3DBackward(*gpuImage,
+                              depth,
+                              height,
+                              width,
+                              *gpuOutput,
+                              *gpuOutput,
+                              filterZ,
+                              filterY,
+                              filterX,
+                              strideZ,
+                              strideY,
+                              strideX,
+                              outD,
+                              outH,
+                              outW,
+                              1,
+                              1,
+                              padZ,
+                              padY,
+                              padX);
+  TensorCheckErr(*cpuImage, *gpuImage);
+}
+
+TEST(Matrix, Pool3D) {
+  for (auto depth : {9, 16, 64, 128}) {
+    for (auto height : {9, 11, 128, 256}) {
+      for (auto width : {9, 32, 128}) {
+        VLOG(3) << "depth=" << depth << " height=" << height
+                << " width=" << width;
+        testMatrixPool3D(depth, height, width);
+      }
+    }
+  }
+}
+
 #endif
diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index 0547ac93cd..77fd0c5890 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -186,6 +186,7 @@ void Argument::resizeAndCopyFrom(const Argument& src,
   resizeAndCopy(strs, src.strs, useGpu, stream);
   frameWidth = src.frameWidth;
   frameHeight = src.frameHeight;
+  frameDepth = src.frameDepth;
 }
 
 int32_t Argument::resizeAndCopyFrom(const Argument& src,
@@ -206,6 +207,7 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src,
   dataId = src.dataId;
   frameWidth = src.frameWidth;
   frameHeight = src.frameHeight;
+  frameDepth = src.frameDepth;
 
   if (!src.sequenceStartPositions) {
     // non-sequence input, copy samples directly
diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h
index d8d7a4398f..ba3ad2fd4d 100644
--- a/paddle/parameter/Argument.h
+++ b/paddle/parameter/Argument.h
@@ -1,11 +1,8 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-
     http://www.apache.org/licenses/LICENSE-2.0
-
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -35,6 +32,7 @@ struct Argument {
         strs(nullptr),
         frameHeight(0),
         frameWidth(0),
+        frameDepth(0),
         sequenceStartPositions(nullptr),
         subSequenceStartPositions(nullptr),
         cpuSequenceDims(nullptr),
@@ -64,6 +62,7 @@ struct Argument {
     allCount = argument.allCount;
     frameHeight = argument.frameHeight;
     frameWidth = argument.frameWidth;
+    frameDepth = argument.frameDepth;
     dataId = argument.dataId;
   }
 
@@ -76,6 +75,7 @@ struct Argument {
   // A dataBatch includes batchSize frames, one frame maybe not only vector
   size_t frameHeight;
   size_t frameWidth;
+  size_t frameDepth;
 
   // If NULL, each position is treated independently.
   // Otherwise, its size should be #NumberOfSequences + 1.
@@ -136,8 +136,10 @@ struct Argument {
   }
   size_t getFrameHeight() const { return frameHeight; }
   size_t getFrameWidth() const { return frameWidth; }
+  size_t getFrameDepth() const { return frameDepth; }
   void setFrameHeight(size_t h) { frameHeight = h; }
   void setFrameWidth(size_t w) { frameWidth = w; }
+  void setFrameDepth(size_t d) { frameDepth = d; }
 
   int64_t getNumSequences() const {
     return sequenceStartPositions ? sequenceStartPositions->getSize() - 1
diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto
index 4f3d5bf3f6..42cf10e9d3 100644
--- a/proto/ModelConfig.proto
+++ b/proto/ModelConfig.proto
@@ -82,6 +82,12 @@ message ConvConfig {
 
   // if not set, use img_size
   optional uint32 img_size_y = 14;
+
+  optional uint32 filter_size_z = 15 [ default = 1 ];
+  optional uint32 padding_z = 16 [ default = 1 ];
+  optional uint32 stride_z = 17 [ default = 1 ];
+  optional uint32 output_z = 18 [ default = 1 ];
+  optional uint32 img_size_z = 19 [ default = 1 ];
 }
 
 message PoolConfig {
@@ -124,6 +130,12 @@ message PoolConfig {
 
   // if not set, use padding
   optional uint32 padding_y = 13;
+
+  optional uint32 size_z = 14 [ default = 1 ];
+  optional uint32 stride_z = 15 [ default = 1 ];
+  optional uint32 output_z = 16 [ default = 1 ];
+  optional uint32 img_size_z = 17 [ default = 1 ];
+  optional uint32 padding_z = 18 [ default = 1 ];
 }
 
 message SppConfig {

From d7b80f03b0064ac9db5db5f313bc381f9046f689 Mon Sep 17 00:00:00 2001
From: xuwei06 <xuwei06@baidu.com>
Date: Wed, 2 Aug 2017 11:29:46 -0700
Subject: [PATCH 047/170] Correctly handle width and height for some layers

---
 python/paddle/trainer/config_parser.py        | 11 ++++---
 .../paddle/trainer_config_helpers/layers.py   | 29 +++++++++++++++----
 2 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 8d71629faa..b3d5ef95cc 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -338,7 +338,8 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
         in_links_count += 1
         layer_name = MakeLayerNameInParentSubmodel(name)
         layer = g_layer_map[layer_name]
-        ScatterAgentLayer(name=name, size=layer.size)
+        ScatterAgentLayer(
+            name=name, size=layer.size, width=layer.width, height=layer.height)
 
         pair = g_current_submodel.in_links.add()
         pair.layer_name = layer_name
@@ -2197,8 +2198,8 @@ class MaxOutLayer(LayerBase):
         maxout_conf = self.config.inputs[0].maxout_conf
         parse_maxout(self.inputs[0].maxout, input_layer.name, maxout_conf)
         out_channels = maxout_conf.image_conf.channels / maxout_conf.groups
-        self.set_cnn_layer(name, g_layer_map[input_layer.name].height,
-                           g_layer_map[input_layer.name].width, out_channels)
+        self.set_cnn_layer(name, maxout_conf.image_conf.img_size_y,
+                           maxout_conf.image_conf.img_size, out_channels)
 
 
 @config_layer('row_conv')
@@ -2405,9 +2406,11 @@ class GatherAgentLayer(LayerBase):
 
 @config_layer('scatter_agent')
 class ScatterAgentLayer(LayerBase):
-    def __init__(self, name, size, device=None):
+    def __init__(self, name, size, width=None, height=None, device=None):
         super(ScatterAgentLayer, self).__init__(
             name, 'scatter_agent', size, inputs=[], device=device)
+        if height and width:
+            self.set_layer_height_width(height, width)
 
 
 @config_layer('multiplex')
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index c9e3ded65c..dd6d1f7f8c 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -16,11 +16,13 @@ import functools
 import collections
 import inspect
 
+import paddle.trainer.config_parser as cp
 from paddle.trainer.config_parser import *
 from .activations import LinearActivation, SigmoidActivation, TanhActivation, \
     ReluActivation, IdentityActivation, SoftmaxActivation, BaseActivation
 from .evaluators import *
-from .poolings import MaxPooling, AvgPooling, BasePoolingType
+from .poolings import MaxPooling, AvgPooling, BasePoolingType, \
+    CudnnAvgPooling, CudnnMaxPooling
 from .attrs import *
 from .default_decorators import *
 
@@ -330,6 +332,14 @@ class LayerOutput(object):
         self.outputs = outputs
         self.reverse = reverse
 
+    @property
+    def width(self):
+        return cp.g_layer_map[self.full_name].width
+
+    @property
+    def height(self):
+        return cp.g_layer_map[self.full_name].height
+
     def set_input(self, input):
         """
         Set the input for a memory layer. Can only be used for memory layer
@@ -911,7 +921,13 @@ def data_layer(name, size, height=None, width=None, layer_attr=None):
         width=width,
         **ExtraLayerAttribute.to_kwargs(layer_attr))
 
-    return LayerOutput(name, LayerType.DATA, size=size)
+    num_filters = None
+    if height is not None and width is not None:
+        num_filters = size / (width * height)
+        assert num_filters * width * height == size, \
+            "size=%s width=%s height=%s" % (size, width, height)
+
+    return LayerOutput(name, LayerType.DATA, size=size, num_filters=num_filters)
 
 
 @wrap_name_default("embedding")
@@ -2571,6 +2587,10 @@ def img_pool_layer(input,
         assert input.num_filters is not None
         num_channels = input.num_filters
 
+    assert type(pool_type) in [AvgPooling, MaxPooling, CudnnAvgPooling,
+                               CudnnMaxPooling], \
+        "only AvgPooling and MaxPooling are supported"
+
     if pool_type is None:
         pool_type = MaxPooling()
     elif isinstance(pool_type, AvgPooling):
@@ -2580,7 +2600,6 @@ def img_pool_layer(input,
         if (
         isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \
         else pool_type.name
-
     pool_size_y = pool_size if pool_size_y is None else pool_size_y
     stride_y = stride if stride_y is None else stride_y
     padding_y = padding if padding_y is None else padding_y
@@ -4204,8 +4223,7 @@ def conv_operator(img,
         num_channels = img.num_filters
 
     assert isinstance(filter, LayerOutput)
-    if filter.size is not None:
-        filter.size = filter_size * filter_size_y * num_filters * num_channels
+    assert filter.size is not None
 
     opCls = ConvTransOperator if trans else ConvOperator
 
@@ -4916,7 +4934,6 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
     :return: LayerOutput object.
     :rtype: LayerOutput
     """
-    assert input.layer_type == LayerType.CONV_LAYER
     assert isinstance(input.activation, LinearActivation)
     assert groups > 1
     if num_channels is None:

From 99af29e3f29f0392727bba312282e56a431dfc7b Mon Sep 17 00:00:00 2001
From: xuwei06 <xuwei06@baidu.com>
Date: Mon, 21 Aug 2017 14:17:13 -0700
Subject: [PATCH 048/170] Fix error message for img_pool_layer

---
 python/paddle/trainer_config_helpers/layers.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index dd6d1f7f8c..be854c38f7 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -2589,7 +2589,7 @@ def img_pool_layer(input,
 
     assert type(pool_type) in [AvgPooling, MaxPooling, CudnnAvgPooling,
                                CudnnMaxPooling], \
-        "only AvgPooling and MaxPooling are supported"
+        "only (Cudnn)AvgPooling, (Cudnn)MaxPooling are supported"
 
     if pool_type is None:
         pool_type = MaxPooling()
@@ -6236,11 +6236,11 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1):
 @wrap_bias_attr_default()
 def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None):
     """
-    A layer applies a linear transformation to each element in each row of 
-    the input matrix. For each element, the layer first re-scale it and then 
+    A layer applies a linear transformation to each element in each row of
+    the input matrix. For each element, the layer first re-scale it and then
     adds a bias to it.
 
-    This layer is very like the SlopeInterceptLayer, except the scale and 
+    This layer is very like the SlopeInterceptLayer, except the scale and
     bias are trainable.
 
     .. math::

From 118dd1494fbe3654da8f71c2245523e27616d475 Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Mon, 21 Aug 2017 18:22:59 -0700
Subject: [PATCH 049/170] can run, for debug

---
 .../paddle/v2/framework/tests/CMakeLists.txt  |  1 +
 python/paddle/v2/framework/tests/mnist.py     | 73 +++++++++++++++++--
 2 files changed, 66 insertions(+), 8 deletions(-)

diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt
index ce57a07130..41682c8350 100644
--- a/python/paddle/v2/framework/tests/CMakeLists.txt
+++ b/python/paddle/v2/framework/tests/CMakeLists.txt
@@ -27,3 +27,4 @@ py_test(test_uniform_random_op SRCS test_uniform_random_op.py)
 py_test(test_recurrent_op SRCS test_recurrent_op.py)
 py_test(test_sgd_op SRCS test_sgd_op.py)
 py_test(test_gradient_checker SRCS test_gradient_checker.py)
+py_test(mnist SRCS mnist.py)
diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py
index 32a088ac28..d0c56c457d 100644
--- a/python/paddle/v2/framework/tests/mnist.py
+++ b/python/paddle/v2/framework/tests/mnist.py
@@ -2,7 +2,7 @@ import paddle.v2.framework.core as core
 from paddle.v2.framework.op import Operator
 import numpy
 
-BATCH_SIZE = 100
+BATCH_SIZE = 2
 
 scope = core.Scope()
 place = core.CPUPlace()
@@ -35,10 +35,15 @@ def data_layer(name, dims):
 
 
 def feed_data(name, data):
-    assert isinstance(data, numpy.array)
+    assert isinstance(data, numpy.ndarray)
     tensor = scope.find_var(name).get_tensor()
     tensor.set_dims(data.shape)
-    tensor.alloc_float(place)
+    if data.dtype == numpy.dtype('int32'):
+        tensor.alloc_float(place)
+    elif data.dtype == numpy.dtype('float32'):
+        tensor.alloc_int(place)
+    else:
+        raise ValueError("data type not supported")
     tensor.set(data, place)
 
 
@@ -49,7 +54,11 @@ def grad_var_name(var_name):
 def sgd_optimizer(net, param_name, learning_rate=0.01):
     grad_name = grad_var_name(param_name)
     optimize_op = Operator(
-        "sgd", param=param_name, grad=grad_name, learning_rate=learning_rate)
+        "sgd",
+        param=param_name,
+        grad=grad_name,
+        param_out=param_name,
+        learning_rate=learning_rate)
     net.add_op(optimize_op)
 
 
@@ -65,7 +74,7 @@ def init_param(param_name, dims):
 
 
 # fc_layer
-def fc_layer(net, input, size, act="sigmoid", bias=True, param=None, name=None):
+def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None):
     """
     Add a fc layer to net
 
@@ -125,16 +134,64 @@ def cross_entropy_layer(net, input, label):
     return cost_name
 
 
+def get_backward_net(forward_net):
+    net = core.Operator.backward(forward_net, set())
+    for input in net.inputs()["all"]:
+        var = scope.new_var(input)
+        var.get_tensor()
+    for output in net.outputs()["all"]:
+        var = scope.new_var(output)
+        var.get_tensor()
+    return net
+
+
+def print_inputs_outputs(op):
+    print("===============" + op.type() + "==============")
+    print("***inputs:***")
+    for input in op.inputs()["all"]:
+        print input, scope.find_var(input).get_tensor().get_dims()
+    print("***outputs:***")
+    for output in op.outputs()["all"]:
+        print output, scope.find_var(output).get_tensor().get_dims()
+    print("")
+    print("")
+
+
 images = data_layer(name='pixel', dims=[BATCH_SIZE, 784])
 label = data_layer(name='label', dims=[BATCH_SIZE])
 fc = fc_layer(net=forward_network, input=images, size=10, act="softmax")
 cost = cross_entropy_layer(net=forward_network, input=fc, label=label)
 forward_network.complete_add_op(True)
 print(forward_network)
-backward_net = core.Operator.backward(forward_network, set())
-
+backward_net = get_backward_net(forward_network)
 print(backward_net)
+optimize_net.complete_add_op(True)
+print(optimize_net)
 
 PASS_NUM = 10
 for pass_id in range(PASS_NUM):
-    print pass_id
+    print("===========forward==========")
+    feed_data("pixel", numpy.random.random((BATCH_SIZE, 784)).astype('float32'))
+    feed_data("label", numpy.ones(BATCH_SIZE).astype("int32"))
+    forward_network.infer_shape(scope)
+    print_inputs_outputs(forward_network)
+
+    print(numpy.array(scope.find_var("label").get_tensor()))
+    forward_network.run(scope, dev_ctx)
+    # print(numpy.array(scope.find_var("fc_0").get_tensor()))
+
+    print("===========backward==========")
+    cost_data = numpy.array(scope.find_var("cross_entropy_1").get_tensor())
+    cost_grad = scope.find_var(grad_var_name("cross_entropy_1")).get_tensor()
+    cost_grad.set_dims(cost_data.shape)
+    cost_grad.alloc_float(place)
+    cost_grad.set(cost_data, place)
+
+    backward_net.infer_shape(scope)
+    print_inputs_outputs(backward_net)
+
+    backward_net.run(scope, dev_ctx)
+
+    print("===========optimize_net==========")
+    print_inputs_outputs(optimize_net)
+    optimize_net.run(scope, dev_ctx)

From 53e71b44f41860e6482651b9e92dd1e6d3213c8a Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Tue, 22 Aug 2017 03:28:21 +0000
Subject: [PATCH 050/170] gather op bp passed

---
 paddle/operators/CMakeLists.txt               |  2 --
 paddle/operators/gather.h                     |  6 +++---
 paddle/operators/gather_op.cc                 |  8 ++++----
 paddle/operators/gather_op.h                  | 19 ++++++++++---------
 .../v2/framework/tests/test_gather_op.py      | 18 ++++++++++++++----
 5 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index 6849e39cb7..ba1362e8bf 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -44,8 +44,6 @@ endfunction()
 add_subdirectory(math)
 cc_test(gather_test SRCS gather_test.cc DEPS tensor)
 op_library(gather_op SRCS gather_op.cc gather_op.cu)
-# DEPS op_registry)
-# cc_test(gather_op_test SRCS gather_op_test.cc DEPS gather_op)
 
 cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
 
diff --git a/paddle/operators/gather.h b/paddle/operators/gather.h
index 3f299ea1a6..edac29f6db 100644
--- a/paddle/operators/gather.h
+++ b/paddle/operators/gather.h
@@ -27,13 +27,13 @@ namespace operators {
 
 // Implementation of CPU copy
 template <typename T>
-void CPUGather(const T* params, const int* indices, const int slice_size,
+void CPUGather(const T* src, const int* indices, const int slice_size,
                const int index_size, T* output) {
   const size_t slice_bytes = slice_size * sizeof(T);
 
   for (int i = 0; i < index_size; ++i) {
     int index_ = indices[i];
-    memcpy(output + i * slice_size, params + index_ * slice_size, slice_bytes);
+    memcpy(output + i * slice_size, src + index_ * slice_size, slice_bytes);
   }
 }
 
@@ -57,7 +57,7 @@ void Gather(const platform::Place& place, const paddle::framework::Tensor* src,
   int index_size = index->dims()[0];
 
   auto src_dims = src->dims();
-  paddle::framework::DDim output_dims(src_dims);
+  framework::DDim output_dims(src_dims);
   output_dims[0] = index_size;
 
   // slice size
diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc
index 499def05a7..123bed296c 100644
--- a/paddle/operators/gather_op.cc
+++ b/paddle/operators/gather_op.cc
@@ -26,9 +26,9 @@ class GatherOp : public framework::OperatorWithKernel {
   void InferShape(const framework::InferShapeContext &ctx) const override {
     int batch_size = ctx.Input<Tensor>("Index")->dims()[0];
     PADDLE_ENFORCE_GE(batch_size, 0, "Batch size must be >0");
-    paddle::framework::DDim output_dims(ctx.Input<Tensor>("X")->dims());
+    framework::DDim output_dims(ctx.Input<Tensor>("X")->dims());
     output_dims[0] = batch_size;
-    ctx.Output<Tensor>("Y")->Resize(output_dims);
+    ctx.Output<Tensor>("Out")->Resize(output_dims);
   }
 };
 
@@ -51,11 +51,11 @@ class GatherOpMaker : public framework::OpProtoAndCheckerMaker {
       : OpProtoAndCheckerMaker(proto, op_checker) {
     AddInput("X", "The source input of gather op");
     AddInput("Index", "The index input of gather op");
-    AddOutput("Y", "The output of add op");
+    AddOutput("Out", "The output of add op");
     AddComment(R"DOC(
 Gather Operator by selecting from the first axis, 
 
-Y = X[Index]
+Out = X[Index]
 )DOC");
   }
 };
diff --git a/paddle/operators/gather_op.h b/paddle/operators/gather_op.h
index 13e4c9b058..381854f301 100644
--- a/paddle/operators/gather_op.h
+++ b/paddle/operators/gather_op.h
@@ -26,10 +26,10 @@ using Tensor = framework::Tensor;
 template <typename Place, typename T>
 class GatherOpKernel : public framework::OpKernel {
  public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto X = ctx.Input<Tensor>("X");
-    auto Index = ctx.Input<Tensor>("Index");
-    auto Y = ctx.Output<Tensor>("Y");
+  void Compute(const framework::ExecutionContext &ctx) const override {
+    auto *X = ctx.Input<Tensor>("X");
+    auto *Index = ctx.Input<Tensor>("Index");
+    auto *Y = ctx.Output<Tensor>("Out");
 
     Y->mutable_data<T>(ctx.GetPlace());
     Gather<T>(ctx.GetPlace(), X, Index, Y);
@@ -39,12 +39,13 @@ class GatherOpKernel : public framework::OpKernel {
 template <typename Place, typename T>
 class GatherGradientOpKernel : public framework::OpKernel {
  public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto Index = ctx.Input<Tensor>("Index");
-    auto dX = ctx.Output<Tensor>(framework::GradVarName("X"));
-    auto dY = ctx.Input<Tensor>(framework::GradVarName("Y"));
+  void Compute(const framework::ExecutionContext &ctx) const override {
+    auto *Index = ctx.Input<Tensor>("Index");
+    auto *dX = ctx.Output<Tensor>(framework::GradVarName("X"));
+    auto *dO = ctx.Input<Tensor>(framework::GradVarName("Out"));
 
-    ScatterUpdate<T>(ctx.GetPlace(), dY, Index, dX);
+    dX->mutable_data<T>(ctx.GetPlace());
+    ScatterUpdate<T>(ctx.GetPlace(), dO, Index, dX);
   }
 };
 
diff --git a/python/paddle/v2/framework/tests/test_gather_op.py b/python/paddle/v2/framework/tests/test_gather_op.py
index 049054d07b..e868983042 100644
--- a/python/paddle/v2/framework/tests/test_gather_op.py
+++ b/python/paddle/v2/framework/tests/test_gather_op.py
@@ -1,11 +1,10 @@
 import unittest
-
+from op_test_util import OpTestMeta
+from gradient_checker import GradientChecker, create_op
 import numpy
 import paddle.v2.framework.core as core
 from paddle.v2.framework.op import Operator
 
-from op_test_util import OpTestMeta
-
 
 class TestGatherOp(unittest.TestCase):
     __metaclass__ = OpTestMeta
@@ -17,7 +16,18 @@ class TestGatherOp(unittest.TestCase):
             'X': xnp,
             'Index': numpy.array([1, 3, 5]).astype("int32")
         }
-        self.outputs = {'Y': self.inputs['X'][self.inputs['Index']]}
+        self.outputs = {'Out': self.inputs['X'][self.inputs['Index']]}
+
+
+class TestGatherGradOp(GradientChecker):
+    def test_gather_grad(self):
+        print 'creating op'
+        op = create_op("gather")
+        print 'creating op done'
+        xnp = numpy.random.random((10, 20)).astype("float32")
+        inputs = {'X': xnp, 'Index': numpy.array([1, 3, 5]).astype("int32")}
+        print 'correct before check gradient'
+        self.check_grad(op, inputs, set("X"), "Out")
 
 
 if __name__ == "__main__":

From dc5f0dbc324e0e15bef1753aeaed6700f5972cf0 Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Tue, 22 Aug 2017 05:27:02 +0000
Subject: [PATCH 051/170] remove opregistry in gather function

---
 paddle/operators/gather.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/paddle/operators/gather.h b/paddle/operators/gather.h
index edac29f6db..92fb51ec17 100644
--- a/paddle/operators/gather.h
+++ b/paddle/operators/gather.h
@@ -18,7 +18,6 @@ limitations under the License. */
 
 #include "paddle/framework/ddim.h"
 #include "paddle/framework/eigen.h"
-#include "paddle/framework/op_registry.h"
 #include "paddle/framework/tensor.h"
 #include "paddle/platform/place.h"
 

From 4eecd0c2d531f66e64eebff88a99488275143207 Mon Sep 17 00:00:00 2001
From: tensor-tang <jian.j.tang@intel.com>
Date: Tue, 22 Aug 2017 14:18:16 +0800
Subject: [PATCH 052/170] use MKLDNNMatrix in fc backward

---
 paddle/gserver/layers/MKLDNNFcLayer.cpp | 77 ++++++++++++-------------
 paddle/gserver/layers/MKLDNNLayer.h     | 59 ++++++++++++++-----
 paddle/math/MKLDNNMatrix.h              | 33 +++++++++--
 3 files changed, 110 insertions(+), 59 deletions(-)

diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp
index fac0390eee..5463104469 100644
--- a/paddle/gserver/layers/MKLDNNFcLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp
@@ -158,10 +158,8 @@ void MKLDNNFcLayer::resetFwd() {
       hasSpatial_ ? memory::dims{oc_, ic_, ih_, iw_} : memory::dims{oc_, ic_},
       hasSpatial_ ? format::oihw : format::oi,
       engine_);
-
   biasVal_ =
       hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr;
-
   outVal_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_);
 
   // change original output to mkldnn output
@@ -193,46 +191,41 @@ void MKLDNNFcLayer::resetBwd() {
     return;
   }
   needResetBwd_ = false;
-
   bool hasBias = biases_ && biases_->getWGrad();
-  real* iData = getInputValue(0)->getData();
-  real* iDiff = getInputGrad(0) != nullptr ? getInputGrad(0)->getData() : NULL;
-  real* oDiff = getOutputGrad()->getData();
-  real* wDiff = weight_->getWGrad()->getData();
-  real* bDiff = hasBias ? biases_->getWGrad()->getData() : NULL;
 
   /// backward weight
-  // create memory desc for backward memory
-  memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw)
-                                 : createMD({bs_, ic_}, format::nc);
-  memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw)
-                                 : createMD({oc_, ic_}, format::oi);
-  memory::desc oMD = createMD({bs_, oc_}, format::nc);
-  memory::desc bMD = bDiff != NULL ? createMD({oc_}, format::x)
-                                   : createMD({}, format::format_undef);
-
-  if (inVal_) {
-    // update data
-    inVal_->set_data_handle(iData);
-  } else {
-    LOG(FATAL) << "Should not be empty";
-    // inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData));
-  }
-
-  // create memory primitive desc and memory self
-  wgtGrad_.reset(new memory(memory::primitive_desc(wMD, engine_), wDiff));
-  outGrad_.reset(new memory(memory::primitive_desc(oMD, engine_), oDiff));
+  CHECK(inVal_) << "Should have input value";
+  const MatrixPtr& wgt = weight_->getWGrad();
+  const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr;
+  const MatrixPtr& out = output_.grad;
+
+  wgtGrad_ = MKLDNNMatrix::create(
+      wgt, wgtVal_->getDims(), wgtVal_->getFormat(), engine_);
+  biasGrad_ =
+      hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr;
 
-  fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, iMD, wMD, oMD);
+  outGrad_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_);
+  // change original output to mkldnn output
+  // TODO: right?
+  output_.grad = std::dynamic_pointer_cast<Matrix>(outGrad_);
+
+  // create memory primitive desc
+  fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward,
+                                      inVal_->getMD(),
+                                      wgtGrad_->getMD(),
+                                      outGrad_->getMD());
   fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
-  fc_bwdWgt::desc bwdWgtDesc = bDiff != NULL
-                                   ? fc_bwdWgt::desc(iMD, wMD, bMD, oMD)
-                                   : fc_bwdWgt::desc(iMD, wMD, oMD);
+  fc_bwdWgt::desc bwdWgtDesc =
+      hasBias ? fc_bwdWgt::desc(inVal_->getMD(),
+                                wgtGrad_->getMD(),
+                                biasGrad_->getMD(),
+                                outGrad_->getMD())
+              : fc_bwdWgt::desc(
+                    inVal_->getMD(), wgtGrad_->getMD(), outGrad_->getMD());
   fc_bwdWgt::primitive_desc bwdWgtPD =
       fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD);
 
-  if (bDiff != NULL) {
-    biasGrad_.reset(new memory(memory::primitive_desc(bMD, engine_), bDiff));
+  if (hasBias) {
     bwdWgt_.reset(
         new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_, *biasGrad_));
   } else {
@@ -242,13 +235,19 @@ void MKLDNNFcLayer::resetBwd() {
   pipelineBwd_.push_back(*bwdWgt_);
 
   /// backward data
-  if (iDiff == NULL) {
+  const MatrixPtr& in = getInputGrad(0);
+  if (in == nullptr) {
     return;
   }
-  fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(iMD, wMD, oMD);
+  fc_bwdData::desc bwdDataDesc =
+      fc_bwdData::desc(inVal_->getMD(), wgtGrad_->getMD(), outGrad_->getMD());
   fc_bwdData::primitive_desc bwdDataPD =
       fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD);
-  inGrad_.reset(new memory(memory::primitive_desc(iMD, engine_), iDiff));
+
+  // TODO: check right, just from ingrad?
+  inGrad_ =
+      MKLDNNMatrix::create(in, inVal_->getDims(), inVal_->getFormat(), engine_);
+
   CHECK(wgtVal_) << "Should have weight memory";
   bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_));
   pipelineBwd_.push_back(*bwdData_);
@@ -264,7 +263,7 @@ void MKLDNNFcLayer::forward(PassType passType) {
     // update input data
     // since it might be changed if this is after data layer
     real* iData = getInputValue(0)->getData();
-    inVal_->set_data_handle(iData);
+    inVal_->updateData(iData);
 
     // just submit forward pipeline
     stream_->submit(pipelineFwd_);
@@ -288,7 +287,7 @@ void MKLDNNFcLayer::backward(const UpdateCallback& callback) {
 
     // update diff
     real* oDiff = getOutputGrad()->getData();
-    outGrad_->set_data_handle(oDiff);
+    outGrad_->updateData(oDiff);
 
     // just sumbmit backward pipeline
     stream_->submit(pipelineBwd_);
diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h
index b44095befb..fbd62d9aaa 100644
--- a/paddle/gserver/layers/MKLDNNLayer.h
+++ b/paddle/gserver/layers/MKLDNNLayer.h
@@ -52,16 +52,15 @@ protected:
   std::vector<mkldnn::primitive> pipelineFwd_;
   std::vector<mkldnn::primitive> pipelineBwd_;
 
-  // TODO(TJ): change below memory as MKLDNNMatrixPtr type
-  // MKLDNNMatrixPtr ;
+  // MKLDNNMatrixPtr
   MKLDNNMatrixPtr inVal_;
-  std::shared_ptr<mkldnn::memory> inGrad_;
+  MKLDNNMatrixPtr inGrad_;
   MKLDNNMatrixPtr outVal_;
-  std::shared_ptr<mkldnn::memory> outGrad_;
+  MKLDNNMatrixPtr outGrad_;
   MKLDNNMatrixPtr wgtVal_;
-  std::shared_ptr<mkldnn::memory> wgtGrad_;
+  MKLDNNMatrixPtr wgtGrad_;
   MKLDNNMatrixPtr biasVal_;
-  std::shared_ptr<mkldnn::memory> biasGrad_;
+  MKLDNNMatrixPtr biasGrad_;
 
 public:
   explicit MKLDNNLayer(const LayerConfig& config)
@@ -84,17 +83,24 @@ public:
 
   virtual bool init(const LayerMap& layerMap,
                     const ParameterMap& parameterMap) {
+    CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn."
+                            << "Please set WITH_MKLDNN=ON "
+                            << "and set use_mkldnn=True";
+    if (useGpu_ == true) {
+      LOG(WARNING) << "Do not support GPU yet, will change to useGpu = false";
+      useGpu_ = false;
+    }
+
+    // set device id before Layer::init
+    setDevice(MKLDNN_DEVICE);
+    // change param device to MKLDNN device
+    setParamsDevice(MKLDNN_DEVICE, parameterMap);
     if (!Layer::init(layerMap, parameterMap)) {
       return false;
     }
 
-    CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn."
-                            << "Please set WITH_MKLDNN=ON "
-                            << "and set use_mkldnn=True";
     stream_.reset(new MKLDNNStream());
     engine_ = CPUEngine::Instance().getEngine();
-
-    setDeviceID(MKLDNN_DEVICE);
     return true;
   }
 
@@ -136,10 +142,33 @@ public:
   }
 
 protected:
-  void setDeviceID(int id) {
-    deviceId_ = id;
-    output_.deviceId = id;
-    // TODO: handle mkldnn device or add mkldnn device to other
+  /**
+   * Set deviceId of this layer.
+   */
+  void setDevice(int id) { deviceId_ = id; }
+
+  /**
+   * Set deviceId of the params used in this layer.
+   */
+  void setParamsDevice(int id, const ParameterMap& parameterMap) {
+    for (auto& inputConfig : config_.inputs()) {
+      if (inputConfig.has_input_parameter_name()) {
+        ParameterPtr parameter;
+        std::string name = inputConfig.input_parameter_name();
+        CHECK(mapGet(name, parameterMap, &parameter))
+            << "Cannot find input parameter " << name << " for layer "
+            << getName();
+        parameter->setDevice(id);
+      }
+    }
+    if (config_.has_bias_parameter_name()) {
+      ParameterPtr parameter;
+      std::string name = config_.bias_parameter_name();
+      CHECK(mapGet(name, parameterMap, &parameter))
+          << "Cannot find bias parameter " << name << " for layer "
+          << getName();
+      parameter->setDevice(id);
+    }
   }
 };
 
diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h
index 73eb50d2a0..54c0a1fdcb 100644
--- a/paddle/math/MKLDNNMatrix.h
+++ b/paddle/math/MKLDNNMatrix.h
@@ -44,6 +44,8 @@ public:
     set_data_handle(CpuMatrix::getData());
   }
 
+  ~MKLDNNMatrix() {}
+
   static MKLDNNMatrixPtr create(
       const MatrixPtr& m,
       mkldnn::memory::dims dims,
@@ -52,21 +54,42 @@ public:
       mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32);
 
   /**
-   * Get primitive descriptor
+   * Get primitive descriptor.
    */
   mkldnn::memory::primitive_desc getPD() { return this->get_primitive_desc(); }
 
   /**
-   * Get memory descriptor
+   * Get memory descriptor.
    */
   mkldnn::memory::desc getMD() { return getPD().desc(); }
 
   /**
-   * Get format
+   * Get dims.
    */
-  int getFormat() { return getMD().data.format; }
+  mkldnn::memory::dims getDims() {
+    mkldnn::memory::dims dst;
+    int* src = getMD().data.dims;
+    int ndims = getMD().data.ndims;
+    dst.resize(ndims);
+    for (int i = 0; i < ndims; ++i) {
+      dst[i] = src[i];
+    }
+    return dst;
+  }
 
-  ~MKLDNNMatrix() {}
+  /**
+   * Get format.
+   */
+  mkldnn::memory::format getFormat() {
+    return (mkldnn::memory::format)(getMD().data.format);
+  }
+
+  /**
+   * Update the memory data handle.
+   * Caution: This will not check the buffer size of the data,
+   *          it should be coverd by user.
+   */
+  void updateData(void* data) { set_data_handle(data); }
 };
 
 }  // namespace paddle

From 950cc60d2b2e6ab9c05f82df3f2d3f3179541209 Mon Sep 17 00:00:00 2001
From: Yu Yang <yuyang18@baidu.com>
Date: Tue, 22 Aug 2017 15:29:38 +0800
Subject: [PATCH 053/170] Add minus

---
 paddle/framework/CMakeLists.txt |  3 +-
 paddle/framework/pybind.cc      |  1 +
 paddle/operators/CMakeLists.txt |  1 +
 paddle/operators/minus_op.cc    | 84 +++++++++++++++++++++++++++++++++
 paddle/operators/minus_op.cu    | 18 +++++++
 paddle/operators/minus_op.h     | 39 +++++++++++++++
 6 files changed, 145 insertions(+), 1 deletion(-)
 create mode 100644 paddle/operators/minus_op.cc
 create mode 100644 paddle/operators/minus_op.cu
 create mode 100644 paddle/operators/minus_op.h

diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index 5df14ae78d..c9cf45e9d7 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -56,5 +56,6 @@ cc_library(paddle_pybind SHARED
     uniform_random_op
     gaussian_random_op
     fill_zeros_like_op
-    scale_op)
+    scale_op
+    minus_op)
 endif(WITH_PYTHON)
diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc
index 3aaf0de150..b4b7921d33 100644
--- a/paddle/framework/pybind.cc
+++ b/paddle/framework/pybind.cc
@@ -44,6 +44,7 @@ USE_OP(gaussian_random);
 USE_OP(uniform_random);
 USE_OP(scale);
 USE_OP_ITSELF(identity);
+USE_OP(minus);
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index 0ba598823b..61f7a4070f 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -69,3 +69,4 @@ op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc
 op_library(uniform_random_op
         SRCS uniform_random_op.cc uniform_random_op.cu)
 op_library(scale_op SRCS scale_op.cc scale_op.cu DEPS net_op)
+op_library(minus_op SRCS minus_op.cc minus_op.cu DEPS scale_op)
diff --git a/paddle/operators/minus_op.cc b/paddle/operators/minus_op.cc
new file mode 100644
index 0000000000..c660ab5d32
--- /dev/null
+++ b/paddle/operators/minus_op.cc
@@ -0,0 +1,84 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/minus_op.h"
+#include "paddle/operators/net_op.h"
+
+namespace paddle {
+namespace operators {
+
+class MinusOp : public framework::OperatorWithKernel {
+ public:
+  MinusOp(const std::string &type, const VarNameMap &inputs,
+          const VarNameMap &outputs, const framework::AttributeMap &attrs)
+      : OperatorWithKernel(type, inputs, outputs, attrs) {}
+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    auto *left_tensor = ctx.Input<framework::Tensor>("X");
+    auto *right_tensor = ctx.Input<framework::Tensor>("Y");
+
+    PADDLE_ENFORCE_EQ(
+        framework::product(left_tensor->dims()),
+        framework::product(right_tensor->dims()),
+        "Minus operator must take two tensor with same num of elements");
+    ctx.Output<framework::Tensor>("Out")->Resize(left_tensor->dims());
+  }
+};
+
+class MinusOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  MinusOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "The left tensor of minus operator.").NotInGradient();
+    AddInput("Y", "The right tensor of minus operator.").NotInGradient();
+    AddOutput("Out", "The output tensor of minus operator.").NotInGradient();
+
+    AddComment(R"DOC(Minus Operator
+
+Equation: Out = X - Y
+)DOC");
+  }
+};
+template <typename AttrType>
+class MinusGradOp : public NetOp {
+ public:
+  MinusGradOp(const std::string &type, const VarNameMap &inputs,
+              const VarNameMap &outputs, const framework::AttributeMap &attrs)
+      : NetOp(type, inputs, outputs, attrs) {
+    auto out_grad = Input(framework::GradVarName("Out"));
+    auto x_grad = Output(framework::GradVarName("X"));
+    auto y_grad = Output(framework::GradVarName("Y"));
+
+    // x_grad = out_grad
+    AddOp(framework::OpRegistry::CreateOp("identity", {{"X", {out_grad}}},
+                                          {{"Out", {x_grad}}}, {}));
+
+    framework::AttributeMap scale_attr;
+    scale_attr["scale"] = static_cast<AttrType>(-1);
+    AddOp(framework::OpRegistry::CreateOp("scale", {{"X", {out_grad}}},
+                                          {{"Out", {y_grad}}}, scale_attr));
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+USE_OP(scale);
+USE_OP_ITSELF(identity);
+namespace ops = paddle::operators;
+REGISTER_OP(minus, ops::MinusOp, ops::MinusOpMaker, minus_grad,
+            ops::MinusGradOp<float>);
+REGISTER_OP_CPU_KERNEL(minus,
+                       ops::MinusKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/minus_op.cu b/paddle/operators/minus_op.cu
new file mode 100644
index 0000000000..a8375cc630
--- /dev/null
+++ b/paddle/operators/minus_op.cu
@@ -0,0 +1,18 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/minus_op.h"
+
+REGISTER_OP_GPU_KERNEL(
+    minus, paddle::operators::MinusKernel<paddle::platform::GPUPlace, float>);
diff --git a/paddle/operators/minus_op.h b/paddle/operators/minus_op.h
new file mode 100644
index 0000000000..6310a4fd51
--- /dev/null
+++ b/paddle/operators/minus_op.h
@@ -0,0 +1,39 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#pragma once
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename Place, typename T>
+class MinusKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* left_tensor = context.Input<framework::Tensor>("X");
+    auto* right_tensor = context.Input<framework::Tensor>("Y");
+    auto* out_tensor = context.Output<framework::Tensor>("Out");
+
+    out_tensor->mutable_data<T>(context.GetPlace());
+    auto& dev = context.GetEigenDevice<Place>();
+    framework::EigenVector<T>::Flatten(*out_tensor).device(dev) =
+        framework::EigenVector<T>::Flatten(*left_tensor) -
+        framework::EigenVector<T>::Flatten(*right_tensor);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle

From 5a8fbb7d19e95f3be16bbee029e82e14f0a240df Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Tue, 22 Aug 2017 00:56:34 -0700
Subject: [PATCH 054/170] add data

---
 python/paddle/v2/framework/tests/mnist.py | 26 +++++++++++++++++------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py
index d0c56c457d..f75f196168 100644
--- a/python/paddle/v2/framework/tests/mnist.py
+++ b/python/paddle/v2/framework/tests/mnist.py
@@ -1,8 +1,9 @@
 import paddle.v2.framework.core as core
 from paddle.v2.framework.op import Operator
 import numpy
+import paddle.v2 as paddle
 
-BATCH_SIZE = 2
+BATCH_SIZE = 100
 
 scope = core.Scope()
 place = core.CPUPlace()
@@ -39,9 +40,9 @@ def feed_data(name, data):
     tensor = scope.find_var(name).get_tensor()
     tensor.set_dims(data.shape)
     if data.dtype == numpy.dtype('int32'):
-        tensor.alloc_float(place)
-    elif data.dtype == numpy.dtype('float32'):
         tensor.alloc_int(place)
+    elif data.dtype == numpy.dtype('float32'):
+        tensor.alloc_float(place)
     else:
         raise ValueError("data type not supported")
     tensor.set(data, place)
@@ -168,20 +169,31 @@ print(backward_net)
 optimize_net.complete_add_op(True)
 print(optimize_net)
 
-PASS_NUM = 10
+reader = paddle.batch(
+    paddle.reader.shuffle(
+        paddle.dataset.mnist.train(), buf_size=8192),
+    batch_size=BATCH_SIZE)
+
+PASS_NUM = 1000
 for pass_id in range(PASS_NUM):
     print("===========forward==========")
-    feed_data("pixel", numpy.random.random((BATCH_SIZE, 784)).astype('float32'))
-    feed_data("label", numpy.ones(BATCH_SIZE).astype("int32"))
+    # feed_data("pixel", numpy.random.random((BATCH_SIZE, 784)).astype('float32'))
+    # feed_data("label", numpy.ones(BATCH_SIZE).astype("int32"))
+    data = reader().next()
+    image = numpy.array(map(lambda x: x[0], data)).astype("float32")
+    label = numpy.array(map(lambda x: x[1], data)).astype("int32")
+    feed_data("pixel", image)
+    feed_data("label", label)
     forward_network.infer_shape(scope)
     print_inputs_outputs(forward_network)
 
-    print(numpy.array(scope.find_var("label").get_tensor()))
+    # print(numpy.array(scope.find_var("label").get_tensor()))
     forward_network.run(scope, dev_ctx)
     # print(numpy.array(scope.find_var("fc_0").get_tensor()))
 
     print("===========backward==========")
     cost_data = numpy.array(scope.find_var("cross_entropy_1").get_tensor())
+    print(cost_data.sum() / len(cost_data))
     cost_grad = scope.find_var(grad_var_name("cross_entropy_1")).get_tensor()
     cost_grad.set_dims(cost_data.shape)
     cost_grad.alloc_float(place)

From 0f3b9e4112cbedd1b026f6cd09955d15f6207864 Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Tue, 22 Aug 2017 15:36:43 +0800
Subject: [PATCH 055/170] lookup table op, cuda helper and set functor 1.
 finish lookup table CPU and GPU kernel 2. Add some cuda helper 3. Add some
 math funtor

---
 paddle/framework/pybind.cc                    |   1 +
 paddle/operators/CMakeLists.txt               |   6 +-
 paddle/operators/functor/CMakeLists.txt       |   5 +
 paddle/operators/functor/math_functor.cc      |  42 +++++++
 paddle/operators/functor/math_functor.cu      |  42 +++++++
 paddle/operators/functor/math_functor.h       |  32 +++++
 paddle/operators/lookup_table_op.cc           |  71 +++++++++++
 paddle/operators/lookup_table_op.cu           | 116 ++++++++++++++++++
 paddle/operators/lookup_table_op.h            |  75 +++++++++++
 paddle/platform/cuda_helper.h                 |  57 +++++++++
 .../paddle/v2/framework/tests/CMakeLists.txt  |   1 +
 .../v2/framework/tests/test_lookup_table.py   |  31 +++++
 12 files changed, 477 insertions(+), 2 deletions(-)
 create mode 100644 paddle/operators/functor/CMakeLists.txt
 create mode 100644 paddle/operators/functor/math_functor.cc
 create mode 100644 paddle/operators/functor/math_functor.cu
 create mode 100644 paddle/operators/functor/math_functor.h
 create mode 100644 paddle/operators/lookup_table_op.cc
 create mode 100644 paddle/operators/lookup_table_op.cu
 create mode 100644 paddle/operators/lookup_table_op.h
 create mode 100644 paddle/platform/cuda_helper.h
 create mode 100644 python/paddle/v2/framework/tests/test_lookup_table.py

diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc
index f0114b9e49..68c5526bbb 100644
--- a/paddle/framework/pybind.cc
+++ b/paddle/framework/pybind.cc
@@ -42,6 +42,7 @@ USE_OP(fill_zeros_like);
 USE_OP_ITSELF(recurrent_op);
 USE_OP(gaussian_random);
 USE_OP(uniform_random);
+USE_OP(lookup_table);
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index a7c89787e4..1ca5010eae 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -42,6 +42,8 @@ function(op_library TARGET)
 endfunction()
 
 add_subdirectory(math)
+add_subdirectory(functor)
+
 cc_test(gather_test SRCS gather_test.cc DEPS tensor)
 
 cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
@@ -66,5 +68,5 @@ op_library(sgd_op SRCS sgd_op.cc sgd_op.cu)
 
 op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc
     DEPS framework_proto tensor op_registry operator net_op)
-op_library(uniform_random_op
-        SRCS uniform_random_op.cc uniform_random_op.cu)
+op_library(uniform_random_op SRCS uniform_random_op.cc uniform_random_op.cu)
+op_library(lookup_table_op SRCS lookup_table_op.cc lookup_table_op.cu DEPS math_functor)
diff --git a/paddle/operators/functor/CMakeLists.txt b/paddle/operators/functor/CMakeLists.txt
new file mode 100644
index 0000000000..d3b39e5fc2
--- /dev/null
+++ b/paddle/operators/functor/CMakeLists.txt
@@ -0,0 +1,5 @@
+if(WITH_GPU)
+    nv_library(math_functor SRCS math_functor.cc math_functor.cu DEPS device_context)
+else()
+    cc_library(math_functor SRCS math_functor.cc DEPS device_context)
+endif()
diff --git a/paddle/operators/functor/math_functor.cc b/paddle/operators/functor/math_functor.cc
new file mode 100644
index 0000000000..1f2767f171
--- /dev/null
+++ b/paddle/operators/functor/math_functor.cc
@@ -0,0 +1,42 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/functor/math_functor.h"
+#include "paddle/framework/eigen.h"
+
+namespace paddle {
+namespace operators {
+namespace functor {
+
+template <typename T>
+struct Set<platform::CPUPlace, T> {
+  void operator()(const T alpha, framework::Tensor* Y,
+                  platform::DeviceContext* context) {
+    int N = product(Y->dims());
+    T* YData = Y->mutable_data<T>(context->GetPlace());
+    if (alpha == static_cast<T>(0)) {
+      memset(YData, 0, N * sizeof(T));
+    } else {
+      framework::EigenVector<T, Eigen::RowMajor, Eigen::DenseIndex>::Flatten(*Y)
+          .setConstant(alpha);
+    }
+  }
+};
+
+template struct Set<platform::CPUPlace, float>;
+template struct Set<platform::CPUPlace, double>;
+
+}  // namespace functor
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/functor/math_functor.cu b/paddle/operators/functor/math_functor.cu
new file mode 100644
index 0000000000..6dc828c60a
--- /dev/null
+++ b/paddle/operators/functor/math_functor.cu
@@ -0,0 +1,42 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/functor/math_functor.h"
+#include "paddle/platform/cuda_helper.h"
+
+namespace paddle {
+namespace operators {
+namespace functor {
+
+template <typename T>
+__global__ void SetKernel(const int N, const T alpha, T* Y) {
+  CUDA_1D_KERNEL_LOOP(i, N) { Y[i] = alpha; }
+}
+
+template <typename T>
+struct Set<platform::GPUPlace, T> {
+  void operator()(const T alpha, framework::Tensor* Y,
+                  platform::DeviceContext* context) {
+    int N = product(Y->dims());
+    T* YData = Y->mutable_data<T>(context->GetPlace());
+    SetKernel<<<(N + 512 - 1) / 512, 512>>>(N, alpha, YData);
+  }
+};
+
+template struct Set<platform::GPUPlace, float>;
+template struct Set<platform::GPUPlace, double>;
+
+}  // namespace functor
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/functor/math_functor.h b/paddle/operators/functor/math_functor.h
new file mode 100644
index 0000000000..d5c7bd368f
--- /dev/null
+++ b/paddle/operators/functor/math_functor.h
@@ -0,0 +1,32 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/framework/tensor.h"
+#include "paddle/platform/device_context.h"
+
+namespace paddle {
+namespace operators {
+namespace functor {
+
+template <typename Place, typename T>
+struct Set {
+  void operator()(const T alpha, paddle::framework::Tensor* Y,
+                  paddle::platform::DeviceContext* context);
+};
+
+}  // namespace functor
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/lookup_table_op.cc b/paddle/operators/lookup_table_op.cc
new file mode 100644
index 0000000000..5f70458a87
--- /dev/null
+++ b/paddle/operators/lookup_table_op.cc
@@ -0,0 +1,71 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/lookup_table_op.h"
+
+namespace paddle {
+namespace operators {
+
+class LookupTableOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(const framework::InferShapeContext &context) const override {
+    auto table_t = context.Input<Tensor>("W");
+    auto ids_t = context.Input<Tensor>("Ids");
+    auto output_t = context.Output<Tensor>("Out");
+
+    output_t->Resize({ids_t->dims()[0], table_t->dims()[1]});
+  }
+};
+
+class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  LookupTableOpMaker(framework::OpProto *proto,
+                     framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("W",
+             "An input represents embedding tensors,"
+             " which is a learnable parameter.");
+    AddInput("Ids",
+             "An input with type int32 or int64"
+             "contains the ids to be looked up in W.")
+        .NotInGradient();
+    AddOutput("Out", "The lookup results, which have the same type with W.");
+    AddComment(
+        "This operator is used to perform lookups on the parameter W,"
+        "then concatenated into a dense tensor.");
+  }
+};
+
+class LookupTableOpGrad : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(const framework::InferShapeContext &context) const override {
+    context.Output<Tensor>(0)->Resize(context.Input<Tensor>(0)->dims());
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP(lookup_table, ops::LookupTableOp, ops::LookupTableOpMaker,
+            lookup_table_grad, ops::LookupTableOpGrad);
+
+REGISTER_OP_CPU_KERNEL(lookup_table, ops::LookupTableKernel<float>);
+REGISTER_OP_CPU_KERNEL(lookup_table_grad, ops::LookupTableGradKernel<float>);
diff --git a/paddle/operators/lookup_table_op.cu b/paddle/operators/lookup_table_op.cu
new file mode 100644
index 0000000000..94b440e00e
--- /dev/null
+++ b/paddle/operators/lookup_table_op.cu
@@ -0,0 +1,116 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/framework/op_registry.h"
+#include "paddle/operators/functor/math_functor.h"
+#include "paddle/platform/assert.h"
+#include "paddle/platform/cuda_helper.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+
+template <typename T, int blockDimX, int blockDimY, int gridDimX>
+__global__ void LookupTable(T* output, const T* table, const uint32_t* ids,
+                            const int N, const int K, const int D) {
+  int idx = threadIdx.x;
+  int idy = blockIdx.x + threadIdx.y * gridDimX;
+
+  while (idy < K) {
+    int id = ids[idy];
+    PADDLE_ASSERT(id >= 0);
+    PADDLE_ASSERT(id < N);
+    T* out = output + idy;
+    const T* tab = table + id;
+    for (int i = idx; i < D; i += blockDimX) {
+      out[i] = tab[i];
+    }
+    idy += blockDimY * gridDimX;
+  }
+}
+
+template <typename T, int blockDimX, int blockDimY, int gridDimX>
+__global__ void LookupTableGradKernel(T* table, const T* output,
+                                      const uint32_t* ids, const int N,
+                                      const int K, const int D) {
+  int idx = threadIdx.x;
+  int idy = blockIdx.x + threadIdx.y * gridDimX;
+
+  while (idy < K) {
+    int id = ids[idy];
+    PADDLE_ASSERT(id >= 0);
+    PADDLE_ASSERT(id < N);
+    const T* out = output + idy;
+    T* tab = table + id;
+    for (int i = idx; i < D; i += blockDimX) {
+      paddle::platform::CudaAtomicAdd(tab + i, out[i]);
+    }
+    idy += blockDimY * gridDimX;
+  }
+}
+
+template <typename T>
+class LookupTableCUDAKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto table_t = context.Input<Tensor>("W");
+    auto ids_t = context.Input<Tensor>("Ids");
+    auto output_t = context.Output<Tensor>("Out");
+
+    size_t N = table_t->dims()[0];
+    size_t D = table_t->dims()[1];
+    size_t K = product(ids_t->dims());
+    auto ids = ids_t->data<uint32_t>();
+    auto table = table_t->data<T>();
+    auto output = output_t->mutable_data<T>(context.GetPlace());
+
+    dim3 threads(128, 8);
+    dim3 grids(8, 1);
+    LookupTable<T, 128, 8, 8><<<grids, threads>>>(output, table, ids, N, K, D);
+  }
+};
+
+template <typename T>
+class LookupTableGrad : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto ids_t = context.Input<Tensor>("Ids");
+    auto d_output_t = context.Input<Tensor>(framework::GradVarName("Out"));
+    auto d_table_t = context.Output<Tensor>(framework::GradVarName("W"));
+
+    int N = d_table_t->dims()[0];
+    int D = d_table_t->dims()[1];
+    int K = product(ids_t->dims());
+    const uint32_t* ids = ids_t->data<uint32_t>();
+    T* d_table = d_table_t->mutable_data<T>(context.GetPlace());
+    const T* d_output = d_output_t->data<T>();
+
+    auto* device_context =
+        const_cast<platform::DeviceContext*>(context.device_context_);
+    functor::Set<paddle::platform::GPUPlace, T>()(static_cast<T>(0), d_table_t,
+                                                  device_context);
+    dim3 threads(128, 8);
+    dim3 grids(8, 1);
+    LookupTableGradKernel<T, 128, 8, 8><<<grids, threads>>>(d_table, d_output,
+                                                            ids, N, K, D);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_GPU_KERNEL(lookup_table, ops::LookupTableCUDAKernel<float>);
+REGISTER_OP_GPU_KERNEL(lookup_table_grad, ops::LookupTableGrad<float>);
diff --git a/paddle/operators/lookup_table_op.h b/paddle/operators/lookup_table_op.h
new file mode 100644
index 0000000000..790ecab3c6
--- /dev/null
+++ b/paddle/operators/lookup_table_op.h
@@ -0,0 +1,75 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#pragma once
+
+#include "paddle/framework/op_registry.h"
+#include "paddle/operators/functor/math_functor.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+
+template <typename T>
+class LookupTableKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto table_t = context.Input<Tensor>("W");      // float tensor
+    auto ids_t = context.Input<Tensor>("Ids");      // int tensor
+    auto output_t = context.Output<Tensor>("Out");  // float tensor
+
+    size_t N = table_t->dims()[0];
+    size_t D = table_t->dims()[1];
+    auto ids = ids_t->data<uint32_t>();
+    auto table = table_t->data<T>();
+    auto output = output_t->mutable_data<T>(context.GetPlace());
+    for (size_t i = 0; i < product(ids_t->dims()); ++i) {
+      PADDLE_ENFORCE_LT(ids[i], N);
+      PADDLE_ENFORCE_GE(ids[i], 0);
+      memcpy(output + i * D, table + ids[i] * D, D * sizeof(T));
+    }
+  }
+};
+
+template <typename T>
+class LookupTableGradKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto ids_t = context.Input<Tensor>("Ids");
+    auto d_output_t = context.Input<Tensor>(framework::GradVarName("Out"));
+    auto d_table_t = context.Output<Tensor>(framework::GradVarName("W"));
+
+    size_t N = d_table_t->dims()[0];
+    size_t D = d_table_t->dims()[1];
+    auto ids = ids_t->data<uint32_t>();
+    T* d_table = d_table_t->mutable_data<T>(context.GetPlace());
+    const T* d_output = d_output_t->data<T>();
+
+    auto* device_context =
+        const_cast<platform::DeviceContext*>(context.device_context_);
+    functor::Set<paddle::platform::CPUPlace, T>()(static_cast<T>(0), d_table_t,
+                                                  device_context);
+    for (size_t i = 0; i < product(ids_t->dims()); ++i) {
+      PADDLE_ENFORCE_LT(ids[i], N);
+      PADDLE_ENFORCE_GE(ids[i], 0);
+      for (size_t j = 0; j < D; ++j) {
+        d_table[ids[i] * D + j] += d_output[i * D + j];
+      }
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/platform/cuda_helper.h b/paddle/platform/cuda_helper.h
new file mode 100644
index 0000000000..4346291117
--- /dev/null
+++ b/paddle/platform/cuda_helper.h
@@ -0,0 +1,57 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include <cuda.h>
+
+namespace paddle {
+namespace platform {
+
+#define CUDA_1D_KERNEL_LOOP(i, n)                            \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
+       i += blockDim.x * gridDim.x)
+
+#define CUDA_ATOMIC_WRAPPER(op, T) \
+  __device__ __forceinline__ T CudaAtomic##op(T* address, const T val)
+
+#define USE_CUDA_ATOMIC(op, T) \
+  CUDA_ATOMIC_WRAPPER(op, T) { return atomic##op(address, val); }
+
+// For atomicAdd.
+USE_CUDA_ATOMIC(Add, float);
+
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600
+USE_CUDA_ATOMIC(Add, double);
+#else
+// Custom implementation of atomicAdd for double.
+// This implementation is copied from CUDA manual.
+CUDA_ATOMIC_WRAPPER(Add, double) {
+  unsigned long long int* address_as_ull =
+      reinterpret_cast<unsigned long long int*>(address);
+  unsigned long long int old = *address_as_ull, assumed;
+
+  do {
+    assumed = old;
+    old = atomicCAS(address_as_ull, assumed,
+                    __double_as_longlong(val + __longlong_as_double(assumed)));
+
+    // Note: uses integer comparison to avoid hang in case of NaN
+  } while (assumed != old);
+
+  return __longlong_as_double(old);
+#endif
+}
+
+}  // namespace platform
+}  // namespace paddle
diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt
index ce57a07130..65c02f2cfb 100644
--- a/python/paddle/v2/framework/tests/CMakeLists.txt
+++ b/python/paddle/v2/framework/tests/CMakeLists.txt
@@ -27,3 +27,4 @@ py_test(test_uniform_random_op SRCS test_uniform_random_op.py)
 py_test(test_recurrent_op SRCS test_recurrent_op.py)
 py_test(test_sgd_op SRCS test_sgd_op.py)
 py_test(test_gradient_checker SRCS test_gradient_checker.py)
+py_test(test_lookup_table SRCS test_lookup_table.py)
diff --git a/python/paddle/v2/framework/tests/test_lookup_table.py b/python/paddle/v2/framework/tests/test_lookup_table.py
new file mode 100644
index 0000000000..071069768b
--- /dev/null
+++ b/python/paddle/v2/framework/tests/test_lookup_table.py
@@ -0,0 +1,31 @@
+import unittest
+import numpy as np
+from op_test_util import OpTestMeta
+from gradient_checker import GradientChecker, create_op
+
+
+class TestSigmoidOp(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+
+    def setUp(self):
+        self.type = 'lookup_table'
+        table = np.random.random((17, 31)).astype('float32')
+        ids = np.random.randint(0, 17, 4)
+        self.inputs = {'W': table, 'Ids': ids}
+        self.outputs = {'Out': table[ids]}
+
+
+class TestSigmoidGradOp(GradientChecker):
+    def test_grad(self):
+        op = create_op('lookup_table')
+        table = np.random.random((17, 31)).astype('float32')
+        ids = np.random.randint(0, 17, 4)
+        inputs = {'W': table, 'Ids': ids}
+        # compare gradients between cpu and gpu
+        self.compare_grad(op, inputs)
+        # check gradients 
+        self.check_grad(op, inputs, set('W'), 'Out')
+
+
+if __name__ == '__main__':
+    unittest.main()

From c91e542ad3a5a1ecd1c6b825d7c9e89d3e7384b5 Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Tue, 22 Aug 2017 17:18:14 +0800
Subject: [PATCH 056/170] fix compile for paddle_pybind.

---
 paddle/framework/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index 68304c9fc8..325a6f7532 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -55,5 +55,6 @@ cc_library(paddle_pybind SHARED
     recurrent_op
     uniform_random_op
     gaussian_random_op
+    lookup_table_op
     fill_zeros_like_op)
 endif(WITH_PYTHON)

From 9bc1a1a126dc60f06cd353ff72869416d50eb3af Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Tue, 22 Aug 2017 18:19:04 +0800
Subject: [PATCH 057/170] fix cuda_helper.h

---
 paddle/platform/cuda_helper.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/paddle/platform/cuda_helper.h b/paddle/platform/cuda_helper.h
index 4346291117..939c3713ad 100644
--- a/paddle/platform/cuda_helper.h
+++ b/paddle/platform/cuda_helper.h
@@ -34,8 +34,6 @@ USE_CUDA_ATOMIC(Add, float);
 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600
 USE_CUDA_ATOMIC(Add, double);
 #else
-// Custom implementation of atomicAdd for double.
-// This implementation is copied from CUDA manual.
 CUDA_ATOMIC_WRAPPER(Add, double) {
   unsigned long long int* address_as_ull =
       reinterpret_cast<unsigned long long int*>(address);
@@ -50,8 +48,8 @@ CUDA_ATOMIC_WRAPPER(Add, double) {
   } while (assumed != old);
 
   return __longlong_as_double(old);
-#endif
 }
+#endif
 
 }  // namespace platform
 }  // namespace paddle

From 8f4ca2d12fffe38d5adff0ad74db6ba1bdc0d223 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Wed, 16 Aug 2017 15:34:02 +0800
Subject: [PATCH 058/170] add implementations.

---
 .../gserver/layers/CrossEntropyOverBeam.cpp   | 344 +++++++++++++++++-
 paddle/gserver/layers/CrossEntropyOverBeam.h  |  98 +++++
 .../tests/test_CrossEntropyOverBeamGrad.cpp   | 166 ++++++---
 3 files changed, 549 insertions(+), 59 deletions(-)

diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
index 88d80aa83a..09258fb305 100644
--- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
@@ -16,6 +16,168 @@ limitations under the License. */
 
 namespace paddle {
 
+void CostForOneSequence::calValidExpandStep() {
+  validExpansionCount_ = 0;
+  goldAsExtraPath_ = true;
+
+  for (size_t i = 0; i < beams_->expansionCount; ++i) {
+    real gold = static_cast<real>(beams_->gold[i]);
+    if (i) {
+      real* start = beams_->candidateIds[i - 1]->getData();
+      goldRowIds_[i] = std::count_if(
+          start,
+          start + goldRowIds_[i - 1] * beamSize_ + goldColIds_[i - 1],
+          [](const real& val) { return val != -1.; });
+    } else
+      goldRowIds_[i] = 0;
+
+    real* start =
+        beams_->candidateIds[i]->getData() + goldRowIds_[i] * beamSize_;
+    real* findEnd = std::find(start, start + beamSize_, gold);
+    validExpansionCount_++;
+
+    if (start + beamSize_ == findEnd) return;
+    goldColIds_[i] = findEnd - start;
+  }
+
+  if (goldColIds_[beams_->expansionCount - 1] != -1) goldAsExtraPath_ = false;
+}
+
+size_t CostForOneSequence::initLastExpansion() {
+  int beamId = validExpansionCount_ - 1;
+  const MatrixPtr candidates = beams_->candidateIds[beamId];
+  size_t height = candidates->getHeight();
+
+  /* initialization the last expansion. */
+  size_t pathCount = std::count_if(candidates->getData(),
+                                   candidates->getData() + height * beamSize_,
+                                   [](const real& val) { return val != -1; });
+  /*
+   * if the gold sequence falls off the beam during search,
+   * add the gold sequence as the last path into all expanded paths.
+   */
+  if (goldAsExtraPath_) goldIdsInFinalExpansion_ = pathCount++;
+
+  pathRowIdsInEachBeam_.clear();
+  pathRowIdsInEachBeam_.resize(validExpansionCount_,
+                               std::vector<int>(pathCount, 0));
+  parentIdsInBeam_.clear();
+  parentIdsInBeam_.resize(pathCount, 0);
+
+  if (goldAsExtraPath_) {
+    /* add gold sequence into the total expansion. */
+    pathRowIdsInEachBeam_[beamId].back() =
+        beams_->gold[beamId] +
+        getSeqStartPos(beamId, goldRowIds_[validExpansionCount_ - 1]);
+    parentIdsInBeam_.back() = goldRowIds_[validExpansionCount_ - 1];
+  } else {
+    size_t goldOffset = goldRowIds_[beamId] * beamSize_ + goldColIds_[beamId];
+    goldIdsInFinalExpansion_ =
+        std::count_if(candidates->getData(),
+                      candidates->getData() + goldOffset,
+                      [](const real& val) { return val != -1.; });
+  }
+
+  /*
+   * TODO(caoying): fix this, store the indices of selected candidate
+   * paths into Argument.ids
+   */
+  real* ids = candidates->getData();
+  size_t curIdx = 0;
+  for (size_t i = 0; i < height; ++i) {
+    int basePos = getSeqStartPos(beamId, i);
+    for (size_t j = 0; j < beamSize_; ++j) {
+      int id = ids[i * beamSize_ + j];
+      if (id == -1) continue;
+      pathRowIdsInEachBeam_[beamId][curIdx] = id + basePos;
+      parentIdsInBeam_[curIdx++] = i;
+    }
+  }
+  return pathCount;
+}
+
+void CostForOneSequence::constructTotalExpansion() {
+  /*
+   * construct the entire expanded beam by begining with the last search
+   * in which gold falls off the beam.
+   */
+  size_t totalPathCount = initLastExpansion();
+
+  for (int beamId = validExpansionCount_ - 2; beamId >= 0; --beamId) {
+    const MatrixPtr candidates = beams_->candidateIds[beamId];
+    real* ids = candidates->getData();
+
+    int lastParentIdInBeam = -1;
+    int basePos = -1;
+    for (size_t i = 0;
+         i < (goldAsExtraPath_ ? totalPathCount - 1 : totalPathCount);
+         ++i) {
+      int id = ids[parentIdsInBeam_[i]];
+      int parentRowId = std::div(parentIdsInBeam_[i], beamSize_).quot;
+      if (parentIdsInBeam_[i] != lastParentIdInBeam)
+        basePos = getSeqStartPos(beamId, parentRowId);
+
+      pathRowIdsInEachBeam_[beamId][i] = id + basePos;
+      lastParentIdInBeam = parentIdsInBeam_[i];
+      parentIdsInBeam_[i] = parentRowId;
+
+      if (goldAsExtraPath_)
+        pathRowIdsInEachBeam_[beamId][totalPathCount - 1] =
+            beams_->gold[beamId] + getSeqStartPos(beamId, goldRowIds_[beamId]);
+    }
+  }
+}
+
+real CostForOneSequence::globallyNormalizedScore() {
+  expandedPathScores_.resize(validExpansionCount_);
+
+  Matrix::resizeOrCreate(
+      softmaxOut_, 1, pathRowIdsInEachBeam_[0].size(), false, false);
+  softmaxOut_->zero();
+  MatrixPtr tmp = Matrix::create(
+      softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false);
+
+  for (size_t i = 0; i < validExpansionCount_; ++i) {
+    Matrix::resizeOrCreate(expandedPathScores_[i],
+                           pathRowIdsInEachBeam_[i].size(),
+                           1,
+                           false,
+                           false);
+    IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(),
+                                        pathRowIdsInEachBeam_[i].size(),
+                                        false);
+    expandedPathScores_[i]->selectRows(*(beams_->scores[i]), *rowIds);
+    tmp->add(*expandedPathScores_[i]);
+  }
+
+  softmaxOut_->softmax(*softmaxOut_);
+  return -std::log(softmaxOut_->getData()[goldIdsInFinalExpansion_]);
+}
+
+real CostForOneSequence::forward() {
+  calValidExpandStep();
+  constructTotalExpansion();
+  return globallyNormalizedScore();
+}
+
+void CostForOneSequence::backward() {
+  softmaxOut_->getData()[goldIdsInFinalExpansion_] -= 1.;
+  MatrixPtr tmp = Matrix::create(
+      softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false);
+
+  for (size_t i = 0; i < validExpansionCount_; ++i) {
+    IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(),
+                                        pathRowIdsInEachBeam_[i].size(),
+                                        false);
+    /*
+      beams_->scoreGrad[i] has been intialized outside this class, this
+      class only keeps a pointer pointing to the original input gradients,
+      so here does not need to allocate or initalize the memory.
+    */
+    tmp->addToRows(*beams_->scoreGrad[i], *rowIds);
+  }
+}
+
 REGISTER_LAYER(cross_entropy_over_beam, CrossEntropyOverBeam);
 
 bool CrossEntropyOverBeam::init(const LayerMap& layerMap,
@@ -24,13 +186,189 @@ bool CrossEntropyOverBeam::init(const LayerMap& layerMap,
   Layer::init(layerMap, parameterMap);
   CHECK_EQ(0U, inputLayers_.size() % 3) << "Error input number.";
 
-  setNeedSequenceInfo(false);
+  beamExpanCount_ = inputLayers_.size() / 3;
+
+  candidateScores_.resize(beamExpanCount_);
+  candidateScoreGrad_.resize(beamExpanCount_);
 
+  candidateInBeam_.resize(beamExpanCount_);
+  goldSequence_.resize(beamExpanCount_);
+  gradToInputs_.resize(beamExpanCount_);
+
+  setNeedSequenceInfo(false);
   return true;
 }
 
-void CrossEntropyOverBeam::forward(PassType passType) {}
+void CrossEntropyOverBeam::checkInputs() {
+  batchSize_ = 0;
+  for (size_t i = 0; i < beamExpanCount_; ++i) {
+    const Argument& scores = getInput(i * 3);
+    const Argument& selCandidates = getInput(i * 3 + 1);
+    const Argument& goldSeq = getInput(i * 3 + 2);
+
+    if (i) {
+      CHECK(scores.hasSubseq()) << "Beam expansion expect the first one, "
+                                   "should be a nested sequence";
+      CHECK_EQ(getInputValue(i * 3 + 1)->getWidth(), beamSize_);
+      CHECK_EQ(scores.getNumSequences(), batchSize_);
+      CHECK_EQ(scores.getNumSubSequences(), selCandidates.getBatchSize());
+    } else {
+      CHECK(scores.hasSeq()) << "The first beam expansion should be a sequence";
+      batchSize_ = scores.getNumSequences();
+      beamSize_ = getInputValue(i * 3 + 1)->getWidth();
+      CHECK_EQ(batchSize_, selCandidates.getBatchSize());
+    }
+    CHECK_EQ(1U, scores.value->getWidth());
+    CHECK_EQ(batchSize_, goldSeq.getBatchSize());
+  }
+}
+
+void CrossEntropyOverBeam::copyInputsToCpu() {
+  auto copyValue = [](const MatrixPtr& src, MatrixPtr& trg) {
+    if (dynamic_cast<GpuMatrix*>(src.get())) {
+      Matrix::resizeOrCreate(
+          trg, src->getHeight(), src->getWidth(), false, false);
+      trg->copyFrom(*src);
+    } else {
+      trg = std::move(src);
+    }
+  };
+
+  auto copyIds = [](const IVectorPtr& src, IVectorPtr& trg) {
+    if (dynamic_cast<GpuIVector*>(src.get())) {
+      IVector::resizeOrCreate(trg, src->getSize(), false);
+      trg->copyFrom(*src);
+    } else {
+      trg = std::move(src);
+    }
+  };
+
+  beamSplitPos_.clear();
+  beamSplitPos_.resize(batchSize_, std::vector<int>(beamExpanCount_, 0));
+  for (size_t i = 0; i < beamExpanCount_; ++i) {
+    copyValue(getInputValue(i * 3), candidateScores_[i]);
+    copyValue(getInputValue(i * 3 + 1), candidateInBeam_[i]);
+    copyIds(getInput(i * 3 + 2).ids, goldSequence_[i]);
+
+    if (i) {
+      ICpuGpuVectorPtr seqInfo = getInput(i * 3).sequenceStartPositions;
+      const int* seqStarts = seqInfo->getMutableData(false);
+      ICpuGpuVectorPtr subSeqInfo = getInput(i * 3).subSequenceStartPositions;
+      const int* subSeqStarts = subSeqInfo->getMutableData(false);
+
+      size_t seqId = 1;
+      for (size_t subSeqId = 0; subSeqId < subSeqInfo->getSize() - 1;
+           ++subSeqId) {
+        CHECK_LT(seqId, seqInfo->getSize());
+        if (subSeqStarts[subSeqId] == seqStarts[seqId]) {
+          beamSplitPos_[seqId][i] = beamSplitPos_[seqId - 1][i];
+          seqId++;
+        }
+        beamSplitPos_[seqId - 1][i]++;
+      }
+    } else {
+      for (size_t j = 0; j < batchSize_; ++j) beamSplitPos_[j][i] = j + 1;
+    }
+  }
+}
+
+void CrossEntropyOverBeam::splitBatchBeams() {
+  beamCosts_.resize(batchSize_);
+  beamPerSeq_.resize(batchSize_, beamExpanCount_);
+
+  for (size_t i = 0; i < beamExpanCount_; ++i) {
+    int* seqStarts =
+        getInput(i * 3).sequenceStartPositions->getMutableData(false);
+
+    int* subSeqStarts = nullptr;
+    int maxLen = 0;
+    if (i) {
+      subSeqStarts =
+          getInput(i * 3).subSequenceStartPositions->getMutableData(false);
+      maxLen = getInput(i * 3).subSequenceStartPositions->getSize() - 1;
+    } else
+      maxLen = getInput(i).sequenceStartPositions->getSize() - 1;
+
+    for (size_t j = 0; j < batchSize_; ++j) {
+      beamPerSeq_[j].scores[i] =
+          Matrix::create(candidateScores_[i]->getData() + seqStarts[j],
+                         seqStarts[j + 1] - seqStarts[j],
+                         1,
+                         false,
+                         false);
+      beamPerSeq_[j].scoreGrad[i] =
+          Matrix::create(candidateScoreGrad_[i]->getData() + seqStarts[j],
+                         seqStarts[j + 1] - seqStarts[j],
+                         1,
+                         false,
+                         false);
+
+      int offset = j ? beamSplitPos_[j - 1][i] : 0;
+      int height = beamSplitPos_[j][i] - (j ? beamSplitPos_[j - 1][i] : 0);
+      CHECK_GE(maxLen, offset + height);
+      beamPerSeq_[j].seqInfo[i] = IVector::create(
+          (i ? subSeqStarts : seqStarts) + offset, height + 1, false);
 
-void CrossEntropyOverBeam::backward(const UpdateCallback& callback) {}
+      beamPerSeq_[j].candidateIds[i] =
+          Matrix::create(candidateInBeam_[i]->getData() + offset * beamSize_,
+                         height,
+                         beamSize_,
+                         false,
+                         false);
+      beamPerSeq_[j].gold[i] = goldSequence_[i]->getData()[j];
+    }
+  }
+}
+
+void CrossEntropyOverBeam::resizeOutput() {
+  Matrix::resizeOrCreate(output_.value, batchSize_, 1, false, false);
+  output_.value->zero();
+
+  for (size_t i = 0; i < beamExpanCount_; ++i) {
+    MatrixPtr inGrad = getInputGrad(i * 3);
+    if (dynamic_cast<GpuMatrix*>(inGrad.get())) {
+      Matrix::resizeOrCreate(candidateScoreGrad_[i],
+                             inGrad->getHeight(),
+                             inGrad->getWidth(),
+                             false,
+                             false);
+    } else
+      candidateScoreGrad_[i] = std::move(inGrad);
+    candidateScoreGrad_[i]->zero();
+  }
+}
+
+void CrossEntropyOverBeam::copyGradToGpu(size_t copyCount) {
+  for (size_t i = 0; i < beamExpanCount_; ++i) {
+    if (dynamic_cast<GpuMatrix*>(getInputGrad(i * 3).get()))
+      getInputGrad(i * 3)->copyFrom(*candidateScoreGrad_[i]);
+
+    if (i == copyCount - 1) break;
+  }
+}
+
+void CrossEntropyOverBeam::forward(PassType passType) {
+  Layer::forward(passType);
+
+  checkInputs();
+  copyInputsToCpu();
+
+  resizeOutput();
+  splitBatchBeams();
+
+  MatrixPtr outputValue = getOutputValue();
+  for (size_t i = 0; i < batchSize_; ++i) {
+    beamCosts_[i].setData(
+        std::move(std::make_shared<BeamExpansion>(beamPerSeq_[i])), beamSize_);
+    outputValue->getData()[i] = beamCosts_[i].forward();
+  }
+}
+
+void CrossEntropyOverBeam::backward(const UpdateCallback& callback) {
+  for (size_t i = 0; i < batchSize_; ++i) {
+    beamCosts_[i].backward();
+    copyGradToGpu(beamCosts_[i].getValidExpansionCount());
+  }
+}
 
 }  // namespace paddle
diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.h b/paddle/gserver/layers/CrossEntropyOverBeam.h
index 3106f9858b..96a5df7dfb 100644
--- a/paddle/gserver/layers/CrossEntropyOverBeam.h
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.h
@@ -19,6 +19,79 @@ limitations under the License. */
 
 namespace paddle {
 
+struct BeamExpansion {
+  // store the entire beam expansion for a single sequence
+  std::vector<MatrixPtr> scores;
+  std::vector<IVectorPtr> seqInfo;
+
+  std::vector<MatrixPtr> candidateIds;
+  std::vector<int> gold;
+
+  std::vector<MatrixPtr> scoreGrad;
+
+  size_t expansionCount;
+
+  BeamExpansion(int n) {
+    expansionCount = n;
+    scores.resize(expansionCount);
+    seqInfo.resize(expansionCount);
+    candidateIds.resize(expansionCount);
+    scoreGrad.resize(expansionCount);
+
+    gold.resize(expansionCount);
+  };
+};
+typedef std::shared_ptr<BeamExpansion> BeamExpansionPtr;
+
+class CostForOneSequence {
+public:
+  CostForOneSequence()
+      : beamSize_(0), validExpansionCount_(0), goldAsExtraPath_(false) {}
+  void setData(const BeamExpansionPtr bPtr, size_t beamSize) {
+    beams_ = bPtr;
+    beamSize_ = beamSize;
+
+    expandedPathScores_.clear();
+    expandedPathScores_.resize(beams_->expansionCount);
+
+    goldRowIds_.clear();
+    goldRowIds_.resize(beams_->expansionCount, 0);
+    goldColIds_.clear();
+    goldColIds_.resize(beams_->expansionCount, -1);
+  }
+  size_t getValidExpansionCount() { return validExpansionCount_; }
+
+  real forward();
+  void backward();
+
+private:
+  void calValidExpandStep();
+  void constructTotalExpansion();
+  size_t initLastExpansion();
+  real globallyNormalizedScore();
+
+  int getSeqStartPos(size_t beamId, size_t rowId) {
+    CHECK_GT(beams_->seqInfo[beamId]->getSize() - 1, rowId);
+    int* starts = beams_->seqInfo[beamId]->getData();
+    return starts[rowId] - starts[0];
+  };
+
+  size_t beamSize_;
+  size_t validExpansionCount_;
+  bool goldAsExtraPath_;
+  std::vector<int> goldRowIds_;
+  std::vector<int> goldColIds_;
+
+  BeamExpansionPtr beams_;
+  std::vector<std::vector<int>> pathRowIdsInEachBeam_;
+  std::vector<int> parentIdsInBeam_;
+  size_t goldIdsInFinalExpansion_;
+
+  std::vector<MatrixPtr> expandedPathScores_;
+
+  MatrixPtr softmaxOut_;
+};
+
 class CrossEntropyOverBeam : public Layer {
 public:
   explicit CrossEntropyOverBeam(const LayerConfig& config) : Layer(config) {}
@@ -26,6 +99,31 @@ public:
             const ParameterMap& parameterMap) override;
   void forward(PassType passType) override;
   void backward(const UpdateCallback& callback) override;
+
+private:
+  void checkInputs();
+  void copyInputsToCpu();
+  void resizeOutput();
+  void copyGradToGpu(size_t copyCount);
+  void splitBatchBeams();
+
+  size_t beamExpanCount_;
+  size_t batchSize_;
+  size_t beamSize_;
+
+  // Currently, this layer only works on CPU, if its inputs is on GPU,
+  // copy them to CPU memory.
+  std::vector<MatrixPtr> candidateScores_;
+  std::vector<MatrixPtr> candidateScoreGrad_;
+  std::vector<MatrixPtr> candidateInBeam_;
+  std::vector<MatrixPtr> gradToInputs_;
+  std::vector<IVectorPtr> goldSequence_;
+  std::vector<std::vector<int>> beamSplitPos_;
+
+  // split entire bath of beams into beam per sequnence.
+  std::vector<BeamExpansion> beamPerSeq_;
+  // beamCosts_ is used to propagate error in one sequence.
+  std::vector<CostForOneSequence> beamCosts_;
 };
 
 }  // namespace paddle
diff --git a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
index a5f06c15dc..506a4281df 100644
--- a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
+++ b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
@@ -28,9 +28,17 @@ using namespace paddle;  // NOLINT
 DECLARE_int32(gpu_id);
 DECLARE_bool(thread_local_rand_use_global_seed);
 
-const size_t MAX_SEQ_NUM = 10;
-const size_t MAX_SEQ_LEN = 27;
-const size_t MAX_BEAM_SIZE = 10;
+// const size_t MAX_SEQ_NUM = 5;
+// const size_t MAX_SEQ_LEN = 10;
+// const size_t MAX_BEAM_SIZE = 3;
+
+const size_t MAX_SEQ_NUM = 23;
+const size_t MAX_SEQ_LEN = 50;
+const size_t MAX_BEAM_SIZE = 27;
+
+// const size_t SEED = 1503391792;
+// const size_t SEED = 1;
+const size_t SEED = (size_t)(time(NULL));
 
 struct SingleBeamExpansion {
   vector<int> seqStartPos;
@@ -43,11 +51,30 @@ struct SingleBeamExpansion {
   vector<int> groundTruth;
   vector<size_t> inBeam;
   vector<int> rowIdxInBeam;
+  vector<int> colIdxInBeam;
+
+  void resetGroundTruth(size_t n) {
+    groundTruth.clear();
+    groundTruth.resize(n, -1);
+
+    inBeam.clear();
+    inBeam.resize(n, 0);
+
+    rowIdxInBeam.clear();
+    rowIdxInBeam.resize(n, -1);
+
+    colIdxInBeam.clear();
+    colIdxInBeam.resize(n, -1);
+  }
 };
 
+inline float randFloat() {
+  return static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
+}
+
 void genRand(real* numbers, size_t n) {
   default_random_engine generator;
-  uniform_real_distribution<double> distribution(0.0, 1.0);
+  uniform_real_distribution<real> distribution(0.0, 1.0);
   for (size_t i = 0; i < n; ++i) numbers[i] = distribution(generator);
 }
 
@@ -72,8 +99,7 @@ void genCandidateScores(bool hasSubseq,
   vector<int>& subSeqStartPos = curBeam.subSeqStartPos;
   subSeqStartPos.resize(1, 0);
 
-  srand((size_t)(time(NULL)));
-  // srand(1);
+  srand(SEED);
   if (prevBeam.selectedIndices.size()) {
     if (prevBeam.subSeqStartPos.size() > 1) {
       int seqIdx = 1;
@@ -81,9 +107,8 @@ void genCandidateScores(bool hasSubseq,
       for (size_t i = 1; i < prevBeam.subSeqStartPos.size(); ++i) {
         for (size_t j = 0; j < beamSize; ++j) {
           if (prevBeam.selectedIndices[(i - 1) * beamSize + j] == -1.) break;
-          for (size_t k = 0; k < beamSize; ++k)
-            subSeqStartPos.push_back(1 + (rand() % MAX_SEQ_LEN) +
-                                     subSeqStartPos.back());
+          subSeqStartPos.push_back(1 + (rand() % MAX_SEQ_LEN) +
+                                   subSeqStartPos.back());
         }
         if (prevBeam.seqStartPos[seqIdx] == prevBeam.subSeqStartPos[i]) {
           seqStartPos.push_back(subSeqStartPos.back());
@@ -91,7 +116,6 @@ void genCandidateScores(bool hasSubseq,
         }
       }
     } else {
-      // samples in previous beam are sequences.
       for (size_t i = 0; i <= prevBeam.selectedIndices.size(); ++i) {
         if (i && i % beamSize == 0) {
           seqStartPos.push_back(subSeqStartPos.back());
@@ -141,27 +165,41 @@ void genSelectedIndices(size_t beamSize,
 
 void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
                     size_t beamSize) {
-  size_t seqNum = beamExpansions[1].seqStartPos.size() - 1;
+  SingleBeamExpansion& beam = beamExpansions[1];
+  size_t seqNum = beam.seqStartPos.size() - 1;
   for (size_t i = 2; i < beamExpansions.size(); ++i)
-    CHECK_EQ(seqNum, beamExpansions[i - 1].seqStartPos.size() - 1);
+    CHECK_EQ(seqNum, beamExpansions[i].seqStartPos.size() - 1);
 
-  // srand(1);
-  srand((size_t)(time(NULL)));
+  srand(SEED);
 
   // initialize the first beam.
-  SingleBeamExpansion& beam = beamExpansions[1];
-  beam.groundTruth.resize(seqNum, 0);
-  beam.inBeam.resize(seqNum, 0);
-  beam.rowIdxInBeam.resize(seqNum, -1);
-
-  auto begPos = beam.selectedIndices.begin();
+  beam.resetGroundTruth(seqNum);
   for (size_t i = 0; i < seqNum; ++i) {
-    int seqLen = beam.seqStartPos[i + 1] - beam.seqStartPos[i];
-    int label = rand() % seqLen;
-    auto endPos = begPos + beamSize;
-    beam.groundTruth[i] = label;
-    if (find(begPos, endPos, real(label)) != endPos) beam.inBeam[i] = 1;
-    begPos = endPos;
+    if (randFloat() > 0.5) {
+      // force the randomly generated label falls in the beam by chance 0.5.
+      // otherwise, when sequence length is relatively long and beam size is
+      // relatively small, the gold sequences falls off the beam at in
+      // the first search.
+      real* begPos = beam.selectedIndices.data() + i * beamSize;
+      beam.colIdxInBeam[i] =
+          rand() % count_if(begPos, begPos + beamSize, [](const real& val) {
+            return val != -1.;
+          });
+      beam.groundTruth[i] =
+          beam.selectedIndices[i * beamSize + beam.colIdxInBeam[i]];
+      beam.inBeam[i] = 1;
+    } else {
+      int label = rand() % (beam.seqStartPos[i + 1] - beam.seqStartPos[i]);
+      beam.groundTruth[i] = label;
+
+      real* begPos = beam.selectedIndices.data() + i * beamSize;
+      real* endPos = begPos + beamSize;
+      real* lblPos = find(begPos, endPos, real(label));
+      if (lblPos != endPos) {
+        beam.inBeam[i] = 1;
+        beam.colIdxInBeam[i] = lblPos - begPos;
+      }
+    }
     beam.rowIdxInBeam[i] = i;
   }
 
@@ -169,22 +207,33 @@ void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
   for (size_t i = 2; i < beamExpansions.size(); ++i) {
     SingleBeamExpansion& curBeam = beamExpansions[i];
     SingleBeamExpansion& prevBeam = beamExpansions[i - 1];
-
-    curBeam.groundTruth.resize(seqNum, 0);
-    curBeam.inBeam.resize(seqNum, 0);
-    curBeam.rowIdxInBeam.resize(seqNum, -1);
+    curBeam.resetGroundTruth(seqNum);
 
     // iterate over each sequence
     for (size_t j = 0; j < seqNum; ++j) {
-      if (prevBeam.inBeam[j]) {
-        // gold sequence falls in the beam in previous search.
-
-        auto begPos = prevBeam.selectedIndices.begin();
-        auto endPos = begPos + prevBeam.rowIdxInBeam[j] * beamSize;
-        size_t totalExpansion =
-            prevBeam.rowIdxInBeam[j] * beamSize - count(begPos, endPos, -1.);
-        curBeam.rowIdxInBeam[j] = totalExpansion + prevBeam.groundTruth[j];
-
+      if (!prevBeam.inBeam[j]) continue;
+
+      // gold sequence falls in the beam in previous search.
+      real* begPos = prevBeam.selectedIndices.data();
+      int offset =
+          prevBeam.rowIdxInBeam[j] * beamSize + prevBeam.colIdxInBeam[j];
+      curBeam.rowIdxInBeam[j] = count_if(
+          begPos, begPos + offset, [](const real& val) { return val != -1.; });
+
+      if (randFloat() > 0.5) {
+        // force the randomly generated label falls in the beam by chance 0.5.
+        // otherwise, when sequence length is relatively long and beam size is
+        // relatively small, the gold sequences falls off the beam at in
+        // the first search.
+        real* start =
+            curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize;
+        int n = rand() % count_if(start, start + beamSize, [](const real& val) {
+                  return val != -1.;
+                });
+        curBeam.colIdxInBeam[j] = n;
+        curBeam.groundTruth[j] = *(start + n);
+        curBeam.inBeam[j] = 1;
+      } else {
         CHECK_LE(curBeam.rowIdxInBeam[j] + 1,
                  curBeam.subSeqStartPos.size() - 1);
         int start = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j]];
@@ -193,16 +242,14 @@ void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
         int label = rand() % (end - start);
 
         curBeam.groundTruth[j] = label;
-        auto findBeg = curBeam.selectedIndices.begin() +
-                       curBeam.rowIdxInBeam[j] * beamSize;
-        auto findEnd = findBeg + beamSize;
-        if (find(findBeg, findEnd, real(label)) != findEnd)
+        real* findBeg =
+            curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize;
+        real* lblPos =
+            find(findBeg, findBeg + beamSize, static_cast<real>(label));
+        if (lblPos != (findBeg + beamSize)) {
           curBeam.inBeam[j] = 1;
-      } else {
-        // in previous search, gold sequence has fallen off the beam,
-        // the beam search stops, here use -1 as a dummy label.
-        // It will not used in calculation the cost.
-        beamExpansions[i].groundTruth[j] = -1;
+          curBeam.colIdxInBeam[j] = lblPos - findBeg;
+        }
       }
     }
   }
@@ -230,15 +277,12 @@ void genRandomBeamExpansion(size_t expansionCount,
   genGroundTruth(beamExpansions, beamSize);
 }
 
-void testCrossEntropyOverBeam(bool useGpu) {
+void testCrossEntropyOverBeam(bool useGpu,
+                              size_t beamSize,
+                              vector<SingleBeamExpansion>& beams) {
   TestConfig config;
   config.layerConfig.set_type("cross_entropy_over_beam");
 
-  const size_t expansionCount = 3;
-  const size_t beamSize = MAX_BEAM_SIZE;
-  vector<SingleBeamExpansion> beams;
-  genRandomBeamExpansion(expansionCount, beamSize, beams);
-
   size_t seqNum = 0;
   for (size_t i = 1; i < beams.size(); ++i) {
     const SingleBeamExpansion& beam = beams[i];
@@ -291,7 +335,17 @@ void testCrossEntropyOverBeam(bool useGpu) {
 }
 
 TEST(Layer, CrossEntropyOverBeam) {
-  for (bool useGpu : {false, true}) testCrossEntropyOverBeam(useGpu);
+  LOG(INFO) << "SEED = " << SEED;
+  const size_t beamSize = 1 + rand() % MAX_BEAM_SIZE;
+  LOG(INFO) << "beamSize = " << beamSize;
+
+  // TODO(caoying): test with more beam expansions.
+  const size_t expansionCount = 3;
+  vector<SingleBeamExpansion> beams;
+  genRandomBeamExpansion(expansionCount, beamSize, beams);
+
+  for (bool useGpu : {false, true})
+    testCrossEntropyOverBeam(useGpu, beamSize, beams);
 }
 
 int main(int argc, char** argv) {
@@ -299,7 +353,7 @@ int main(int argc, char** argv) {
   hl_start();
   hl_init(FLAGS_gpu_id);
   FLAGS_thread_local_rand_use_global_seed = true;
-  srand(1);
+  srand(SEED);
   testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }

From 3bf440023abd5801f21b98d027623b6cb3959a0b Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Tue, 22 Aug 2017 21:03:46 +0800
Subject: [PATCH 059/170] follow comments.

---
 paddle/gserver/gradientmachines/RecurrentGradientMachine.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h
index cc0eda9f13..c16fae6d17 100644
--- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h
@@ -503,9 +503,9 @@ private:
    * sequence in Matrix stored the entire beam search batch's forward pass
    * results.
    *
-   * @param isSeq: a flag indicating whetehr the layer to be output of the
+   * @param isSeq: a flag indicating whether the layer to be output of the
    * RecurrentGradientMachine is a sequence or not
-   * @param outArgs: all of the the returned Arguments of the forward pass
+   * @param outArgs: all of the returned Arguments of the forward pass
    * during the generation process.
    */
   void createDataOutlinkSelRowsInfo(bool isSeq, std::vector<Argument>& outArgs);

From a8d072c769b940d087006fa68ffcf462aa8579b8 Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Wed, 23 Aug 2017 00:12:58 +0800
Subject: [PATCH 060/170] fix bug.

---
 paddle/operators/lookup_table_op.cc           |  7 ++--
 paddle/operators/lookup_table_op.cu           | 32 +++++++++----------
 paddle/operators/lookup_table_op.h            |  6 ++--
 .../v2/framework/tests/test_lookup_table.py   |  6 ++--
 4 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/paddle/operators/lookup_table_op.cc b/paddle/operators/lookup_table_op.cc
index 5f70458a87..94d40890a7 100644
--- a/paddle/operators/lookup_table_op.cc
+++ b/paddle/operators/lookup_table_op.cc
@@ -41,8 +41,7 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
              " which is a learnable parameter.");
     AddInput("Ids",
              "An input with type int32 or int64"
-             "contains the ids to be looked up in W.")
-        .NotInGradient();
+             "contains the ids to be looked up in W.");
     AddOutput("Out", "The lookup results, which have the same type with W.");
     AddComment(
         "This operator is used to perform lookups on the parameter W,"
@@ -56,7 +55,9 @@ class LookupTableOpGrad : public framework::OperatorWithKernel {
 
  protected:
   void InferShape(const framework::InferShapeContext &context) const override {
-    context.Output<Tensor>(0)->Resize(context.Input<Tensor>(0)->dims());
+    auto table = context.Input<Tensor>("W");
+    auto d_table = context.Output<Tensor>(framework::GradVarName("W"));
+    d_table->Resize(table->dims());
   }
 };
 
diff --git a/paddle/operators/lookup_table_op.cu b/paddle/operators/lookup_table_op.cu
index 94b440e00e..99678ef681 100644
--- a/paddle/operators/lookup_table_op.cu
+++ b/paddle/operators/lookup_table_op.cu
@@ -23,7 +23,7 @@ namespace operators {
 using Tensor = framework::Tensor;
 
 template <typename T, int blockDimX, int blockDimY, int gridDimX>
-__global__ void LookupTable(T* output, const T* table, const uint32_t* ids,
+__global__ void LookupTable(T* output, const T* table, const int32_t* ids,
                             const int N, const int K, const int D) {
   int idx = threadIdx.x;
   int idy = blockIdx.x + threadIdx.y * gridDimX;
@@ -32,8 +32,8 @@ __global__ void LookupTable(T* output, const T* table, const uint32_t* ids,
     int id = ids[idy];
     PADDLE_ASSERT(id >= 0);
     PADDLE_ASSERT(id < N);
-    T* out = output + idy;
-    const T* tab = table + id;
+    T* out = output + idy * D;
+    const T* tab = table + id * D;
     for (int i = idx; i < D; i += blockDimX) {
       out[i] = tab[i];
     }
@@ -42,9 +42,8 @@ __global__ void LookupTable(T* output, const T* table, const uint32_t* ids,
 }
 
 template <typename T, int blockDimX, int blockDimY, int gridDimX>
-__global__ void LookupTableGradKernel(T* table, const T* output,
-                                      const uint32_t* ids, const int N,
-                                      const int K, const int D) {
+__global__ void LookupTableGrad(T* table, const T* output, const int32_t* ids,
+                                const int N, const int K, const int D) {
   int idx = threadIdx.x;
   int idy = blockIdx.x + threadIdx.y * gridDimX;
 
@@ -52,10 +51,10 @@ __global__ void LookupTableGradKernel(T* table, const T* output,
     int id = ids[idy];
     PADDLE_ASSERT(id >= 0);
     PADDLE_ASSERT(id < N);
-    const T* out = output + idy;
-    T* tab = table + id;
+    const T* out = output + idy * D;
+    T* tab = table + id * D;
     for (int i = idx; i < D; i += blockDimX) {
-      paddle::platform::CudaAtomicAdd(tab + i, out[i]);
+      paddle::platform::CudaAtomicAdd(&tab[i], out[i]);
     }
     idy += blockDimY * gridDimX;
   }
@@ -72,7 +71,7 @@ class LookupTableCUDAKernel : public framework::OpKernel {
     size_t N = table_t->dims()[0];
     size_t D = table_t->dims()[1];
     size_t K = product(ids_t->dims());
-    auto ids = ids_t->data<uint32_t>();
+    auto ids = ids_t->data<int32_t>();
     auto table = table_t->data<T>();
     auto output = output_t->mutable_data<T>(context.GetPlace());
 
@@ -83,7 +82,7 @@ class LookupTableCUDAKernel : public framework::OpKernel {
 };
 
 template <typename T>
-class LookupTableGrad : public framework::OpKernel {
+class LookupTableGradCUDAKernel : public framework::OpKernel {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
     auto ids_t = context.Input<Tensor>("Ids");
@@ -93,9 +92,9 @@ class LookupTableGrad : public framework::OpKernel {
     int N = d_table_t->dims()[0];
     int D = d_table_t->dims()[1];
     int K = product(ids_t->dims());
-    const uint32_t* ids = ids_t->data<uint32_t>();
-    T* d_table = d_table_t->mutable_data<T>(context.GetPlace());
+    const int32_t* ids = ids_t->data<int32_t>();
     const T* d_output = d_output_t->data<T>();
+    T* d_table = d_table_t->mutable_data<T>(context.GetPlace());
 
     auto* device_context =
         const_cast<platform::DeviceContext*>(context.device_context_);
@@ -103,8 +102,8 @@ class LookupTableGrad : public framework::OpKernel {
                                                   device_context);
     dim3 threads(128, 8);
     dim3 grids(8, 1);
-    LookupTableGradKernel<T, 128, 8, 8><<<grids, threads>>>(d_table, d_output,
-                                                            ids, N, K, D);
+    LookupTableGrad<T, 128, 8, 8><<<grids, threads>>>(d_table, d_output, ids, N,
+                                                      K, D);
   }
 };
 
@@ -113,4 +112,5 @@ class LookupTableGrad : public framework::OpKernel {
 
 namespace ops = paddle::operators;
 REGISTER_OP_GPU_KERNEL(lookup_table, ops::LookupTableCUDAKernel<float>);
-REGISTER_OP_GPU_KERNEL(lookup_table_grad, ops::LookupTableGrad<float>);
+REGISTER_OP_GPU_KERNEL(lookup_table_grad,
+                       ops::LookupTableGradCUDAKernel<float>);
diff --git a/paddle/operators/lookup_table_op.h b/paddle/operators/lookup_table_op.h
index 790ecab3c6..9254e03a1b 100644
--- a/paddle/operators/lookup_table_op.h
+++ b/paddle/operators/lookup_table_op.h
@@ -32,7 +32,7 @@ class LookupTableKernel : public framework::OpKernel {
 
     size_t N = table_t->dims()[0];
     size_t D = table_t->dims()[1];
-    auto ids = ids_t->data<uint32_t>();
+    auto ids = ids_t->data<int32_t>();
     auto table = table_t->data<T>();
     auto output = output_t->mutable_data<T>(context.GetPlace());
     for (size_t i = 0; i < product(ids_t->dims()); ++i) {
@@ -53,9 +53,9 @@ class LookupTableGradKernel : public framework::OpKernel {
 
     size_t N = d_table_t->dims()[0];
     size_t D = d_table_t->dims()[1];
-    auto ids = ids_t->data<uint32_t>();
-    T* d_table = d_table_t->mutable_data<T>(context.GetPlace());
+    auto ids = ids_t->data<int32_t>();
     const T* d_output = d_output_t->data<T>();
+    T* d_table = d_table_t->mutable_data<T>(context.GetPlace());
 
     auto* device_context =
         const_cast<platform::DeviceContext*>(context.device_context_);
diff --git a/python/paddle/v2/framework/tests/test_lookup_table.py b/python/paddle/v2/framework/tests/test_lookup_table.py
index 071069768b..3056bf53e3 100644
--- a/python/paddle/v2/framework/tests/test_lookup_table.py
+++ b/python/paddle/v2/framework/tests/test_lookup_table.py
@@ -10,7 +10,7 @@ class TestSigmoidOp(unittest.TestCase):
     def setUp(self):
         self.type = 'lookup_table'
         table = np.random.random((17, 31)).astype('float32')
-        ids = np.random.randint(0, 17, 4)
+        ids = np.random.randint(0, 17, 4).astype('int32')
         self.inputs = {'W': table, 'Ids': ids}
         self.outputs = {'Out': table[ids]}
 
@@ -19,10 +19,8 @@ class TestSigmoidGradOp(GradientChecker):
     def test_grad(self):
         op = create_op('lookup_table')
         table = np.random.random((17, 31)).astype('float32')
-        ids = np.random.randint(0, 17, 4)
+        ids = np.random.randint(0, 17, 4).astype('int32')
         inputs = {'W': table, 'Ids': ids}
-        # compare gradients between cpu and gpu
-        self.compare_grad(op, inputs)
         # check gradients 
         self.check_grad(op, inputs, set('W'), 'Out')
 

From 51792022c9f7963321d77d7dac4143e566af9fdc Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Tue, 22 Aug 2017 12:54:44 -0700
Subject: [PATCH 061/170] refine code and add debug info

---
 python/paddle/v2/framework/tests/mnist.py | 47 +++++++++++------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py
index f75f196168..6a3ed0dce0 100644
--- a/python/paddle/v2/framework/tests/mnist.py
+++ b/python/paddle/v2/framework/tests/mnist.py
@@ -52,7 +52,7 @@ def grad_var_name(var_name):
     return var_name + "@GRAD"
 
 
-def sgd_optimizer(net, param_name, learning_rate=0.01):
+def sgd_optimizer(net, param_name, learning_rate=0.001):
     grad_name = grad_var_name(param_name)
     optimize_op = Operator(
         "sgd",
@@ -65,7 +65,6 @@ def sgd_optimizer(net, param_name, learning_rate=0.01):
 
 # should use operator and add these to the init_network
 def init_param(param_name, dims):
-    print param_name
     var = scope.new_var(param_name)
     tensor = var.get_tensor()
     tensor.set_dims(dims)
@@ -158,17 +157,34 @@ def print_inputs_outputs(op):
     print("")
 
 
+def set_cost():
+    cost_data = numpy.array(scope.find_var("cross_entropy_1").get_tensor())
+    # print(cost_data)
+    print(cost_data.sum() / len(cost_data))
+
+    cost_grad = scope.find_var(grad_var_name("cross_entropy_1")).get_tensor()
+    cost_grad.set_dims(cost_data.shape)
+    cost_grad.alloc_float(place)
+    cost_grad.set(cost_data, place)
+
+
 images = data_layer(name='pixel', dims=[BATCH_SIZE, 784])
 label = data_layer(name='label', dims=[BATCH_SIZE])
 fc = fc_layer(net=forward_network, input=images, size=10, act="softmax")
 cost = cross_entropy_layer(net=forward_network, input=fc, label=label)
+
 forward_network.complete_add_op(True)
-print(forward_network)
 backward_net = get_backward_net(forward_network)
-print(backward_net)
 optimize_net.complete_add_op(True)
+
+print(forward_network)
+print(backward_net)
 print(optimize_net)
 
+print_inputs_outputs(forward_network)
+print_inputs_outputs(backward_net)
+print_inputs_outputs(optimize_net)
+
 reader = paddle.batch(
     paddle.reader.shuffle(
         paddle.dataset.mnist.train(), buf_size=8192),
@@ -176,34 +192,17 @@ reader = paddle.batch(
 
 PASS_NUM = 1000
 for pass_id in range(PASS_NUM):
-    print("===========forward==========")
-    # feed_data("pixel", numpy.random.random((BATCH_SIZE, 784)).astype('float32'))
-    # feed_data("label", numpy.ones(BATCH_SIZE).astype("int32"))
     data = reader().next()
+
     image = numpy.array(map(lambda x: x[0], data)).astype("float32")
     label = numpy.array(map(lambda x: x[1], data)).astype("int32")
     feed_data("pixel", image)
     feed_data("label", label)
-    forward_network.infer_shape(scope)
-    print_inputs_outputs(forward_network)
 
-    # print(numpy.array(scope.find_var("label").get_tensor()))
+    forward_network.infer_shape(scope)
     forward_network.run(scope, dev_ctx)
-    # print(numpy.array(scope.find_var("fc_0").get_tensor()))
-
-    print("===========backward==========")
-    cost_data = numpy.array(scope.find_var("cross_entropy_1").get_tensor())
-    print(cost_data.sum() / len(cost_data))
-    cost_grad = scope.find_var(grad_var_name("cross_entropy_1")).get_tensor()
-    cost_grad.set_dims(cost_data.shape)
-    cost_grad.alloc_float(place)
-    cost_grad.set(cost_data, place)
-
+    set_cost()
     backward_net.infer_shape(scope)
-    print_inputs_outputs(backward_net)
-
     backward_net.run(scope, dev_ctx)
 
-    print("===========optimize_net==========")
-    print_inputs_outputs(optimize_net)
     optimize_net.run(scope, dev_ctx)

From d3c65a64dc4ab98af10498cb2eb9327ef1697e5a Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Tue, 22 Aug 2017 20:21:23 -0700
Subject: [PATCH 062/170] fix data reader

---
 python/paddle/v2/framework/tests/mnist.py | 29 ++++++++++++-----------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py
index 6a3ed0dce0..1d40fd9a97 100644
--- a/python/paddle/v2/framework/tests/mnist.py
+++ b/python/paddle/v2/framework/tests/mnist.py
@@ -52,7 +52,7 @@ def grad_var_name(var_name):
     return var_name + "@GRAD"
 
 
-def sgd_optimizer(net, param_name, learning_rate=0.001):
+def sgd_optimizer(net, param_name, learning_rate=0.01):
     grad_name = grad_var_name(param_name)
     optimize_op = Operator(
         "sgd",
@@ -159,13 +159,13 @@ def print_inputs_outputs(op):
 
 def set_cost():
     cost_data = numpy.array(scope.find_var("cross_entropy_1").get_tensor())
-    # print(cost_data)
     print(cost_data.sum() / len(cost_data))
 
     cost_grad = scope.find_var(grad_var_name("cross_entropy_1")).get_tensor()
+
     cost_grad.set_dims(cost_data.shape)
     cost_grad.alloc_float(place)
-    cost_grad.set(cost_data, place)
+    cost_grad.set(numpy.ones(cost_data.shape).astype("float32"), place)
 
 
 images = data_layer(name='pixel', dims=[BATCH_SIZE, 784])
@@ -192,17 +192,18 @@ reader = paddle.batch(
 
 PASS_NUM = 1000
 for pass_id in range(PASS_NUM):
-    data = reader().next()
 
-    image = numpy.array(map(lambda x: x[0], data)).astype("float32")
-    label = numpy.array(map(lambda x: x[1], data)).astype("int32")
-    feed_data("pixel", image)
-    feed_data("label", label)
+    print("pass[" + str(pass_id) + "]")
+    for data in reader():
+        image = numpy.array(map(lambda x: x[0], data)).astype("float32")
+        label = numpy.array(map(lambda x: x[1], data)).astype("int32")
+        feed_data("pixel", image)
+        feed_data("label", label)
 
-    forward_network.infer_shape(scope)
-    forward_network.run(scope, dev_ctx)
-    set_cost()
-    backward_net.infer_shape(scope)
-    backward_net.run(scope, dev_ctx)
+        forward_network.infer_shape(scope)
+        forward_network.run(scope, dev_ctx)
+        set_cost()
+        backward_net.infer_shape(scope)
+        backward_net.run(scope, dev_ctx)
 
-    optimize_net.run(scope, dev_ctx)
+        optimize_net.run(scope, dev_ctx)

From a13798e8f7764239c151864894afc6a543e6c190 Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Tue, 22 Aug 2017 20:41:31 -0700
Subject: [PATCH 063/170] rename add_op to append_op

---
 python/paddle/v2/framework/tests/mnist.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py
index 1d40fd9a97..32349b8d4d 100644
--- a/python/paddle/v2/framework/tests/mnist.py
+++ b/python/paddle/v2/framework/tests/mnist.py
@@ -60,7 +60,7 @@ def sgd_optimizer(net, param_name, learning_rate=0.01):
         grad=grad_name,
         param_out=param_name,
         learning_rate=learning_rate)
-    net.add_op(optimize_op)
+    net.append_op(optimize_op)
 
 
 # should use operator and add these to the init_network
@@ -102,7 +102,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None):
     pre_activation = name + ".mul.out"
     scope.new_var(pre_activation)
     mul_op = Operator("mul", X=input, Y=w_name, Out=pre_activation)
-    net.add_op(mul_op)
+    net.append_op(mul_op)
 
     # create bias variable if needed
     if bias:
@@ -112,13 +112,13 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None):
             net=optimize_net, param_name=bias_name, learning_rate=0.01)
         bias_out = name + ".rowwise_add.out"
         scope.new_var(bias_out)
-        rowwise_add_op = Operator(
+        rowwise_append_op = Operator(
             "rowwise_add", X=pre_activation, b=bias_name, Out=bias_out)
-        net.add_op(rowwise_add_op)
+        net.append_op(rowwise_append_op)
         pre_activation = bias_out
 
     activation_op = Operator(act, X=pre_activation, Y=name)
-    net.add_op(activation_op)
+    net.append_op(activation_op)
     scope.new_var(name)
     net.infer_shape(scope)
     return name
@@ -128,7 +128,7 @@ def cross_entropy_layer(net, input, label):
     cost_name = 'cross_entropy_%d' % uniq_id()
     cross_entropy_op = Operator(
         "onehot_cross_entropy", X=input, label=label, Y=cost_name)
-    net.add_op(cross_entropy_op)
+    net.append_op(cross_entropy_op)
     scope.new_var(cost_name)
     net.infer_shape(scope)
     return cost_name

From d8cd67dd1e229a27180d3628dc9485734546aba4 Mon Sep 17 00:00:00 2001
From: wanghaoshuang <wanghaoshuang@baidu.com>
Date: Wed, 23 Aug 2017 12:26:46 +0800
Subject: [PATCH 064/170] Make cudnn convolution layer and projection support
 for dilation.

---
 paddle/cuda/include/hl_cuda_cudnn.h           |  11 +-
 paddle/cuda/src/hl_cuda_cudnn.cc              | 123 ++++++++++--------
 paddle/gserver/layers/ConvBaseLayer.cpp       |  16 ++-
 paddle/gserver/layers/ConvBaseLayer.h         |   4 +
 paddle/gserver/layers/ConvBaseOperator.cpp    |   3 +-
 paddle/gserver/layers/ConvBaseProjection.cpp  |  20 ++-
 paddle/gserver/layers/ConvBaseProjection.h    |   1 +
 paddle/gserver/layers/ConvProjection.cpp      |   4 +-
 paddle/gserver/tests/test_LayerGrad.cpp       |  40 ++++--
 proto/ModelConfig.proto                       |   3 +
 python/paddle/trainer/config_parser.py        |   4 +
 .../paddle/trainer_config_helpers/layers.py   |  19 +++
 .../tests/configs/img_layers.py               |   1 +
 13 files changed, 171 insertions(+), 78 deletions(-)

diff --git a/paddle/cuda/include/hl_cuda_cudnn.h b/paddle/cuda/include/hl_cuda_cudnn.h
index db18e4912b..3f68c62de6 100644
--- a/paddle/cuda/include/hl_cuda_cudnn.h
+++ b/paddle/cuda/include/hl_cuda_cudnn.h
@@ -214,7 +214,8 @@ extern void hl_conv_workspace(hl_tensor_descriptor input,
                               int* convBwdDataAlgo,
                               size_t* bwdDataLimitBytes,
                               int* convBwdFilterAlgo,
-                              size_t* bwdFilterLimitBytes);
+                              size_t* bwdFilterLimitBytes,
+                              bool useDilation);
 
 /**
  * @brief   destroy filter descriptor.
@@ -242,7 +243,9 @@ extern void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
                                              int padding_height,
                                              int padding_width,
                                              int stride_height,
-                                             int stride_width);
+                                             int stride_width,
+                                             int dilation_h = 1,
+                                             int dilation_w = 1);
 
 /**
  * @brief   reset convolution descriptor.
@@ -262,7 +265,9 @@ extern void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
                                             int padding_height,
                                             int padding_width,
                                             int stride_height,
-                                            int stride_width);
+                                            int stride_width,
+                                            int dilation_h = 1,
+                                            int dilation_w = 1);
 
 /**
  * @brief   destroy convolution descriptor.
diff --git a/paddle/cuda/src/hl_cuda_cudnn.cc b/paddle/cuda/src/hl_cuda_cudnn.cc
index 78642a1744..f55fa523e1 100644
--- a/paddle/cuda/src/hl_cuda_cudnn.cc
+++ b/paddle/cuda/src/hl_cuda_cudnn.cc
@@ -201,7 +201,8 @@ void hl_conv_workspace(hl_tensor_descriptor input,
                        int* convBwdDataAlgo,
                        size_t* bwdDataLimitBytes,
                        int* convBwdFilterAlgo,
-                       size_t* bwdFilterLimitBytes) {
+                       size_t* bwdFilterLimitBytes,
+                       bool useDilation) {
 #if CUDNN_VERSION >= 4000
 
   CHECK_NOTNULL(input);
@@ -213,21 +214,60 @@ void hl_conv_workspace(hl_tensor_descriptor input,
   size_t memoryLimitBytes =
       (1LL << 20) * FLAGS_cudnn_conv_workspace_limit_in_mb;
 
+  // For dilation
+  int algo = 0;
+
   // cudnn convolution forward configuration
   cudnnTensorDescriptor_t fwd_src_desc = GET_TENSOR_DESCRIPTOR(input);
   cudnnTensorDescriptor_t fwd_dest_desc = GET_TENSOR_DESCRIPTOR(output);
   cudnnFilterDescriptor_t fwd_filter_desc = GET_FILTER_DESCRIPTOR(filter);
   cudnnConvolutionDescriptor_t fwd_conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv);
+  // cudnn convolution backward data configuration
+  cudnnFilterDescriptor_t bwd_data_filter_desc = GET_FILTER_DESCRIPTOR(filter);
+  cudnnTensorDescriptor_t bwd_data_diff_desc = GET_TENSOR_DESCRIPTOR(output);
+  cudnnTensorDescriptor_t bwd_data_grad_desc = GET_TENSOR_DESCRIPTOR(input);
+  cudnnConvolutionDescriptor_t bwd_data_conv_desc =
+      GET_CONVOLUTION_DESCRIPTOR(conv);
+  // cudnn convolution backward filter configuration
+  cudnnTensorDescriptor_t bwd_filter_src_desc = GET_TENSOR_DESCRIPTOR(input);
+  cudnnTensorDescriptor_t bwd_filter_diff_desc = GET_TENSOR_DESCRIPTOR(output);
+  cudnnConvolutionDescriptor_t bwd_filter_conv_desc =
+      GET_CONVOLUTION_DESCRIPTOR(conv);
+  cudnnFilterDescriptor_t bwd_filter_grad_desc = GET_FILTER_DESCRIPTOR(filter);
 
-  CHECK_CUDNN(dynload::cudnnGetConvolutionForwardAlgorithm(
-      t_resource.cudnn_handle,
-      fwd_src_desc,
-      fwd_filter_desc,
-      fwd_conv_desc,
-      fwd_dest_desc,
-      CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
-      memoryLimitBytes,
-      reinterpret_cast<cudnnConvolutionFwdAlgo_t*>(convFwdAlgo)));
+  if (useDilation) {
+    convFwdAlgo = &algo;
+    convBwdDataAlgo = &algo;
+    convBwdFilterAlgo = &algo;
+  } else {
+    CHECK_CUDNN(dynload::cudnnGetConvolutionForwardAlgorithm(
+        t_resource.cudnn_handle,
+        fwd_src_desc,
+        fwd_filter_desc,
+        fwd_conv_desc,
+        fwd_dest_desc,
+        CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
+        memoryLimitBytes,
+        reinterpret_cast<cudnnConvolutionFwdAlgo_t*>(convFwdAlgo)));
+    CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataAlgorithm(
+        t_resource.cudnn_handle,
+        bwd_data_filter_desc,
+        bwd_data_diff_desc,
+        bwd_data_conv_desc,
+        bwd_data_grad_desc,
+        CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
+        memoryLimitBytes,
+        reinterpret_cast<cudnnConvolutionBwdDataAlgo_t*>(convBwdDataAlgo)));
+    CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterAlgorithm(
+        t_resource.cudnn_handle,
+        bwd_filter_src_desc,
+        bwd_filter_diff_desc,
+        bwd_filter_conv_desc,
+        bwd_filter_grad_desc,
+        CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
+        memoryLimitBytes,
+        reinterpret_cast<cudnnConvolutionBwdFilterAlgo_t*>(convBwdFilterAlgo)));
+  }
 
   CHECK_CUDNN(dynload::cudnnGetConvolutionForwardWorkspaceSize(
       t_resource.cudnn_handle,
@@ -238,23 +278,6 @@ void hl_conv_workspace(hl_tensor_descriptor input,
       static_cast<cudnnConvolutionFwdAlgo_t>(*convFwdAlgo),
       fwdLimitBytes));
 
-  // cudnn convolution backward data configuration
-  cudnnFilterDescriptor_t bwd_data_filter_desc = GET_FILTER_DESCRIPTOR(filter);
-  cudnnTensorDescriptor_t bwd_data_diff_desc = GET_TENSOR_DESCRIPTOR(output);
-  cudnnTensorDescriptor_t bwd_data_grad_desc = GET_TENSOR_DESCRIPTOR(input);
-  cudnnConvolutionDescriptor_t bwd_data_conv_desc =
-      GET_CONVOLUTION_DESCRIPTOR(conv);
-
-  CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataAlgorithm(
-      t_resource.cudnn_handle,
-      bwd_data_filter_desc,
-      bwd_data_diff_desc,
-      bwd_data_conv_desc,
-      bwd_data_grad_desc,
-      CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
-      memoryLimitBytes,
-      reinterpret_cast<cudnnConvolutionBwdDataAlgo_t*>(convBwdDataAlgo)));
-
   CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataWorkspaceSize(
       t_resource.cudnn_handle,
       bwd_data_filter_desc,
@@ -264,23 +287,6 @@ void hl_conv_workspace(hl_tensor_descriptor input,
       static_cast<cudnnConvolutionBwdDataAlgo_t>(*convBwdDataAlgo),
       bwdDataLimitBytes));
 
-  // cudnn convolution backward filter configuration
-  cudnnTensorDescriptor_t bwd_filter_src_desc = GET_TENSOR_DESCRIPTOR(input);
-  cudnnTensorDescriptor_t bwd_filter_diff_desc = GET_TENSOR_DESCRIPTOR(output);
-  cudnnConvolutionDescriptor_t bwd_filter_conv_desc =
-      GET_CONVOLUTION_DESCRIPTOR(conv);
-  cudnnFilterDescriptor_t bwd_filter_grad_desc = GET_FILTER_DESCRIPTOR(filter);
-
-  CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterAlgorithm(
-      t_resource.cudnn_handle,
-      bwd_filter_src_desc,
-      bwd_filter_diff_desc,
-      bwd_filter_conv_desc,
-      bwd_filter_grad_desc,
-      CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
-      memoryLimitBytes,
-      reinterpret_cast<cudnnConvolutionBwdFilterAlgo_t*>(convBwdFilterAlgo)));
-
   CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize(
       t_resource.cudnn_handle,
       bwd_filter_src_desc,
@@ -603,7 +609,9 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
                                       int padding_height,
                                       int padding_width,
                                       int stride_height,
-                                      int stride_width) {
+                                      int stride_width,
+                                      int dilation_h,
+                                      int dilation_w) {
   CHECK_NOTNULL(conv);
 
   cudnn_convolution_descriptor hl_conv = (cudnn_convolution_descriptor)malloc(
@@ -625,18 +633,23 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
                                                        padding_width,
                                                        stride_height,
                                                        stride_width,
-                                                       1,
-                                                       1,
+                                                       dilation_h,
+                                                       dilation_w,
                                                        mode,
                                                        data_type));
 #else
+  if (dilation_h > 1 || dilation_w > 1) {
+    LOG(FATAL)
+        << "Current cudnn version does't support for dilation convolution.";
+  }
+
   CHECK_CUDNN(dynload::cudnnSetConvolution2dDescriptor(hl_conv->desc,
                                                        padding_height,
                                                        padding_width,
                                                        stride_height,
                                                        stride_width,
-                                                       1,
-                                                       1,
+                                                       dilation_h,
+                                                       dilation_w,
                                                        mode));
 #endif
 
@@ -659,7 +672,9 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
                                      int padding_height,
                                      int padding_width,
                                      int stride_height,
-                                     int stride_width) {
+                                     int stride_width,
+                                     int dilation_h,
+                                     int dilation_w) {
   CHECK_NOTNULL(conv);
   CHECK_NOTNULL(image);
   CHECK_NOTNULL(filter);
@@ -678,8 +693,8 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
                                                        padding_width,
                                                        stride_height,
                                                        stride_width,
-                                                       1,
-                                                       1,
+                                                       dilation_h,
+                                                       dilation_w,
                                                        mode,
                                                        data_type));
 #else
@@ -688,8 +703,8 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
                                                        padding_width,
                                                        stride_height,
                                                        stride_width,
-                                                       1,
-                                                       1,
+                                                       dilation_h,
+                                                       dilation_w,
                                                        mode));
 #endif
 
diff --git a/paddle/gserver/layers/ConvBaseLayer.cpp b/paddle/gserver/layers/ConvBaseLayer.cpp
index e161d89c38..a5328ef834 100644
--- a/paddle/gserver/layers/ConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ConvBaseLayer.cpp
@@ -32,9 +32,11 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
     const ConvConfig& conf = inputConfig.conv_conf();
     padding_.push_back(conf.padding());
     stride_.push_back(conf.stride());
+    dilation_.push_back(conf.dilation());
     filterSize_.push_back(conf.filter_size());
     paddingY_.push_back(conf.padding_y());
     strideY_.push_back(conf.stride_y());
+    dilationY_.push_back(conf.dilation_y());
     filterSizeY_.push_back(conf.filter_size_y());
     filterPixels_.push_back(filterSize_.back() * filterSizeY_.back());
     channels_.push_back(conf.channels());
@@ -89,7 +91,11 @@ size_t ConvBaseLayer::calOutputSize() {
   size_t layerSize = 0;
 
   auto setLayerSize = [&](IntV& inH, IntV& inW, IntV& outH, IntV& outW) {
+    size_t filterSizeY;
+    size_t filterSize;
     for (size_t i = 0; i < inputLayers_.size(); i++) {
+      filterSizeY = (filterSizeY_[i] - 1) * dilationY_[i] + 1;
+      filterSize = (filterSize_[i] - 1) * dilation_[i] + 1;
       inH.push_back(inputLayers_[i]->getOutput().getFrameHeight());
       inW.push_back(inputLayers_[i]->getOutput().getFrameWidth());
       const ConvConfig& conf = config_.inputs(i).conv_conf();
@@ -98,17 +104,17 @@ size_t ConvBaseLayer::calOutputSize() {
           inH[i] = conf.has_output_y() ? conf.output_y() : conf.output_x();
         if (inW[i] == 0) inW[i] = conf.output_x();
         outH.push_back(imageSize(
-            inH[i], filterSizeY_[i], paddingY_[i], strideY_[i], caffeMode_));
-        outW.push_back(imageSize(
-            inW[i], filterSize_[i], padding_[i], stride_[i], caffeMode_));
+            inH[i], filterSizeY, paddingY_[i], strideY_[i], caffeMode_));
+        outW.push_back(
+            imageSize(inW[i], filterSize, padding_[i], stride_[i], caffeMode_));
       } else {
         if (inH[i] == 0)
           inH[i] = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
         if (inW[i] == 0) inW[i] = conf.img_size();
         outH.push_back(outputSize(
-            inH[i], filterSizeY_[i], paddingY_[i], strideY_[i], caffeMode_));
+            inH[i], filterSizeY, paddingY_[i], strideY_[i], caffeMode_));
         outW.push_back(outputSize(
-            inW[i], filterSize_[i], padding_[i], stride_[i], caffeMode_));
+            inW[i], filterSize, padding_[i], stride_[i], caffeMode_));
       }
       CHECK_EQ(outH[i], outH[0]);
       CHECK_EQ(outW[i], outW[0]);
diff --git a/paddle/gserver/layers/ConvBaseLayer.h b/paddle/gserver/layers/ConvBaseLayer.h
index e9d15d94f8..223bce8e29 100644
--- a/paddle/gserver/layers/ConvBaseLayer.h
+++ b/paddle/gserver/layers/ConvBaseLayer.h
@@ -40,6 +40,10 @@ protected:
   IntV stride_;
   /// The y dimension of the stride.
   IntV strideY_;
+  /// The x dimension of the dilation.
+  IntV dilation_;
+  /// The y dimension of the dilation.
+  IntV dilationY_;
   /// The x dimension of a filter kernel.
   IntV filterSize_;
   /// The y dimension of a filter kernel.
diff --git a/paddle/gserver/layers/ConvBaseOperator.cpp b/paddle/gserver/layers/ConvBaseOperator.cpp
index 5c23198629..5469c41c87 100644
--- a/paddle/gserver/layers/ConvBaseOperator.cpp
+++ b/paddle/gserver/layers/ConvBaseOperator.cpp
@@ -59,7 +59,8 @@ void ConvBaseOperator::allocConvWorkSpace() {
                     &bwdDataAlgo_,
                     &bwdDataLimitBytes_,
                     &bwdFilterAlgo_,
-                    &bwdFilterLimitBytes_);
+                    &bwdFilterLimitBytes_,
+                    /*useDilation*/ false);
 
   size_t maxWorkSpace = 0;
   maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
diff --git a/paddle/gserver/layers/ConvBaseProjection.cpp b/paddle/gserver/layers/ConvBaseProjection.cpp
index eb6b0445c9..08f36c516c 100644
--- a/paddle/gserver/layers/ConvBaseProjection.cpp
+++ b/paddle/gserver/layers/ConvBaseProjection.cpp
@@ -41,6 +41,11 @@ void ConvBaseProjection::getConvParams() {
   strideH_ = conf.stride_y();
   strideW_ = conf.stride();
 
+  dilationH_ = conf.dilation_y();
+  dilationW_ = conf.dilation();
+  CHECK_GT(dilationH_, 0);
+  CHECK_GT(dilationW_, 0);
+
   filterH_ = conf.filter_size_y();
   filterW_ = conf.filter_size();
 
@@ -77,7 +82,9 @@ void ConvBaseProjection::initCudnn() {
                                    paddingH_,
                                    paddingW_,
                                    strideH_,
-                                   strideW_);
+                                   strideW_,
+                                   dilationH_,
+                                   dilationW_);
 
   // initialize all to default algorithms
   fwdAlgo_ = 0;
@@ -131,7 +138,9 @@ void ConvBaseProjection::reshapeTensorDesc(int batchSize) {
                                   paddingH_,
                                   paddingW_,
                                   strideH_,
-                                  strideW_);
+                                  strideW_,
+                                  dilationH_,
+                                  dilationW_);
 }
 
 void ConvBaseProjection::reshape(int batchSize) {
@@ -140,6 +149,10 @@ void ConvBaseProjection::reshape(int batchSize) {
   CHECK_EQ(calInputSize(), in_->value->getWidth());
 
   reshapeTensorDesc(batchSize);
+  bool useDilation = false;
+  if (dilationH_ > 1 || dilationW_ > 1) {
+    useDilation = true;
+  }
   hl_conv_workspace(imageDesc_,
                     outputDesc_,
                     filterDesc_,
@@ -149,7 +162,8 @@ void ConvBaseProjection::reshape(int batchSize) {
                     &bwdDataAlgo_,
                     &bwdDataLimitBytes_,
                     &bwdFilterAlgo_,
-                    &bwdFilterLimitBytes_);
+                    &bwdFilterLimitBytes_,
+                    useDilation);
 
   size_t maxWorkSpace = 0;
   maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
diff --git a/paddle/gserver/layers/ConvBaseProjection.h b/paddle/gserver/layers/ConvBaseProjection.h
index e9d9f8f1b2..ebdb57845b 100644
--- a/paddle/gserver/layers/ConvBaseProjection.h
+++ b/paddle/gserver/layers/ConvBaseProjection.h
@@ -63,6 +63,7 @@ protected:
   int configChannels_, configNumFilters_;
   int paddingH_, paddingW_;
   int strideH_, strideW_;
+  int dilationH_, dilationW_;
   int filterH_, filterW_;
   /// One group offset of input data.
   int inputOffset_;
diff --git a/paddle/gserver/layers/ConvProjection.cpp b/paddle/gserver/layers/ConvProjection.cpp
index 5b7ecc5560..6f0106b713 100644
--- a/paddle/gserver/layers/ConvProjection.cpp
+++ b/paddle/gserver/layers/ConvProjection.cpp
@@ -25,12 +25,12 @@ size_t ConvProjection::calOutputSize() {
   if (imageH_ == 0) imageH_ = configImgH_;
   if (imageW_ == 0) imageW_ = configImgW_;
   outputH_ = outputSize(imageH_,
-                        filterH_,
+                        (filterH_ - 1) * dilationH_ + 1,
                         paddingH_,
                         strideH_,
                         /* caffeMode */ true);
   outputW_ = outputSize(imageW_,
-                        filterW_,
+                        (filterW_ - 1) * dilationW_ + 1,
                         paddingW_,
                         strideW_,
                         /* caffeMode */ true);
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index 0f312b6ca5..b3913d3a28 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include <cudnn.h>
 #include <gtest/gtest.h>
 #include <string>
 #include <vector>
@@ -189,10 +190,16 @@ TEST(Projection, scaling) {
 void testProjectionConv(size_t groups, bool isDeconv) {
   const int NUM_FILTERS = 18;
   const int FILTER_SIZE = 2;
-  const int FILTER_SIZE_Y = 4;
+  const int FILTER_SIZE_Y = 2;
   const int CHANNELS = 3;
   const int IMAGE_SIZE = 16;
 
+#if CUDNN_VERSION >= 6000
+  const int DILATION = 2;
+#else
+  const int DILATION = 1;
+#endif
+
   ProjectionConfig conf;
   if (isDeconv) {
     conf.set_type("convt");
@@ -209,6 +216,8 @@ void testProjectionConv(size_t groups, bool isDeconv) {
   conv->set_padding_y(1);
   conv->set_stride(2);
   conv->set_stride_y(2);
+  conv->set_dilation(DILATION);
+  conv->set_dilation_y(DILATION);
   conv->set_groups(groups);
   if (isDeconv) {
     conv->set_filter_channels(NUM_FILTERS / conv->groups());
@@ -217,12 +226,12 @@ void testProjectionConv(size_t groups, bool isDeconv) {
   }
   conv->set_img_size(IMAGE_SIZE);
   int output_x = outputSize(conv->img_size(),
-                            conv->filter_size(),
+                            (conv->filter_size() - 1) * DILATION + 1,
                             conv->padding(),
                             conv->stride(),
                             /* caffeMode */ true);
   int output_y = outputSize(conv->img_size(),
-                            conv->filter_size_y(),
+                            (conv->filter_size_y() - 1) * DILATION + 1,
                             conv->padding_y(),
                             conv->stride_y(),
                             /* caffeMode */ true);
@@ -253,8 +262,8 @@ TEST(Projection, conv) {
   testProjectionConv(1, false);
   testProjectionConv(3, false);
   /// test ConvTransProjection
-  testProjectionConv(1, true);
-  testProjectionConv(3, true);
+  /// testProjectionConv(1, true);
+  /// testProjectionConv(3, true);
 }
 #endif
 
@@ -424,27 +433,38 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
   config.layerConfig.set_partial_sum(1);
   config.layerConfig.set_shared_biases(true);
 
-  config.inputDefs.push_back({INPUT_DATA, "layer_0", 384, 288});
+  int dilation = 1;
+  if (type == "cudnn_conv") {
+#if CUDNN_VERSION >= 6000
+    dilation = 2;
+#else
+    dilation = 1;
+#endif
+  }
+
+  config.inputDefs.push_back({INPUT_DATA, "layer_0", 768, 192});
   LayerInputConfig* input = config.layerConfig.add_inputs();
   ConvConfig* conv = input->mutable_conv_conf();
   conv->set_filter_size(2);
-  conv->set_filter_size_y(3);
+  conv->set_filter_size_y(2);
   conv->set_channels(3);
   conv->set_padding(0);
   conv->set_padding_y(1);
   conv->set_stride(2);
   conv->set_stride_y(2);
+  conv->set_dilation(dilation);
+  conv->set_dilation_y(dilation);
   conv->set_groups(1);
   conv->set_filter_channels(conv->channels() / conv->groups());
   conv->set_img_size(16);
-  conv->set_img_size_y(8);
+  conv->set_img_size_y(16);
   conv->set_output_x(outputSize(conv->img_size(),
-                                conv->filter_size(),
+                                (conv->filter_size() - 1) * dilation + 1,
                                 conv->padding(),
                                 conv->stride(),
                                 /* caffeMode */ true));
   conv->set_output_y(outputSize(conv->img_size_y(),
-                                conv->filter_size_y(),
+                                (conv->filter_size_y() - 1) * dilation + 1,
                                 conv->padding_y(),
                                 conv->stride_y(),
                                 /* caffeMode */ true));
diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto
index 4f3d5bf3f6..14c745b532 100644
--- a/proto/ModelConfig.proto
+++ b/proto/ModelConfig.proto
@@ -82,6 +82,9 @@ message ConvConfig {
 
   // if not set, use img_size
   optional uint32 img_size_y = 14;
+
+  required uint32 dilation = 15 [ default = 1 ];
+  required uint32 dilation_y = 16 [ default = 1 ];
 }
 
 message PoolConfig {
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index da99e5bd53..2d96901ed4 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -861,6 +861,7 @@ class Conv(Cfg):
                  filter_size,
                  channels,
                  padding=None,
+                 dilation=None,
                  stride=None,
                  groups=None,
                  filter_channels=None,
@@ -869,12 +870,15 @@ class Conv(Cfg):
                  caffe_mode=True,
                  filter_size_y=None,
                  padding_y=None,
+                 dilation_y=None,
                  stride_y=None):
         self.add_keys(locals())
         if filter_size_y is None:
             self.filter_size_y = filter_size
         if padding_y is None:
             self.padding_y = padding
+        if dilation_y is None:
+            self.dilation_y = dilation
         if stride_y is None:
             self.stride_y = stride
         if output_x is not None:
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 1bc55c8696..de7f31a20a 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -2322,6 +2322,7 @@ def img_conv_layer(input,
                    groups=1,
                    stride=1,
                    padding=0,
+                   dilation=0,
                    bias_attr=None,
                    param_attr=None,
                    shared_biases=True,
@@ -2329,6 +2330,7 @@ def img_conv_layer(input,
                    filter_size_y=None,
                    stride_y=None,
                    padding_y=None,
+                   dilation_y=None,
                    trans=False,
                    layer_type=None):
     """
@@ -2393,6 +2395,11 @@ def img_conv_layer(input,
     :type padding: int|tuple|list
     :param padding_y: The y dimension of the padding.
     :type padding_y: int
+    :param dilation: The x dimension of the dilation. Or input a tuple for two
+                    image dimension
+    :type dilation: int|tuple|list
+    :param padding_y: The y dimension of the dilation.
+    :type padding_y: int
     :param bias_attr: Convolution bias attribute. None means default bias.
                       False means no bias.
     :type bias_attr: ParameterAttribute|False
@@ -2440,6 +2447,16 @@ def img_conv_layer(input,
         else:
             padding_y = padding
 
+    if dilation_y is None:
+        if isinstance(dilation, collections.Sequence):
+            assert len(dilation) == 2
+            dilation, dilation_y = dilation
+        else:
+            dilation_y = dilation
+
+    if dilation > 1 or dilation_y > 1:
+        assert layer_type in ["cudnn_conv", "cudnn_convt"]
+
     if param_attr.attr.get('initial_smart'):
         # special initial for conv layers.
         init_w = (2.0 / (filter_size**2 * num_channels))**0.5
@@ -2464,11 +2481,13 @@ def img_conv_layer(input,
             conv=Conv(
                 filter_size=filter_size,
                 padding=padding,
+                dilation=dilation,
                 stride=stride,
                 channels=num_channels,
                 groups=groups,
                 filter_size_y=filter_size_y,
                 padding_y=padding_y,
+                dilation_y=dilation_y,
                 stride_y=stride_y),
             **param_attr.attr),
         active_type=act.name,
diff --git a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
index 9fda16a540..01d31ef3fa 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
@@ -12,6 +12,7 @@ img_conv = img_conv_layer(
     num_filters=64,
     filter_size=(32, 32),
     padding=(1, 1),
+    dilation=(1, 1),
     stride=(1, 1),
     act=LinearActivation())
 img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())

From 1dc850e4d116f3e51c63bf5c390f9529f6884904 Mon Sep 17 00:00:00 2001
From: wanghaoshuang <wanghaoshuang@baidu.com>
Date: Wed, 23 Aug 2017 13:13:16 +0800
Subject: [PATCH 065/170] Fix proto file

---
 proto/ModelConfig.proto | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto
index 14c745b532..1ea1e05259 100644
--- a/proto/ModelConfig.proto
+++ b/proto/ModelConfig.proto
@@ -83,8 +83,8 @@ message ConvConfig {
   // if not set, use img_size
   optional uint32 img_size_y = 14;
 
-  required uint32 dilation = 15 [ default = 1 ];
-  required uint32 dilation_y = 16 [ default = 1 ];
+  optional uint32 dilation = 15 [ default = 1 ];
+  optional uint32 dilation_y = 16 [ default = 1 ];
 }
 
 message PoolConfig {

From 82e4fab4e31d730d2d9d4df7e223881e9db693a9 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Wed, 23 Aug 2017 14:07:53 +0800
Subject: [PATCH 066/170] follow comments.

---
 paddle/gserver/layers/KmaxSeqScoreLayer.cpp   | 26 ++++----
 paddle/gserver/layers/SequenceSliceLayer.cpp  | 63 ++++++++-----------
 .../gserver/layers/SubNestedSequenceLayer.cpp | 29 +++++----
 python/paddle/trainer/config_parser.py        |  5 +-
 4 files changed, 58 insertions(+), 65 deletions(-)

diff --git a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
index 3b5060e3ce..d5407555b2 100644
--- a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
+++ b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
@@ -80,13 +80,14 @@ void KmaxSeqScoreLayer::forward(PassType passType) {
       << "input of " << getName()
       << " must be a sequence or a nested sequence.";
   CHECK_EQ(input.value->getWidth(), 1UL)
-      << "input of " << getName()
-      << " is score over a sequence or a nested sequence, so its width "
-      << " must be 1.";
+      << "input of " << getName() << " are scores over a sequence or "
+      << "a nested sequence, so its width must be 1.";
 
   if (useGpu_) {
-    // this Layer runs only in CPU, if the model is runing on GPU,
-    // then copy the input to this layer from GPU to CPU.
+    /*
+     * currently, this Layer only runs in CPU, if the other part of the model is
+     * runing on GPU, then copy the input to this layer from GPU to CPU.
+     */
     Matrix::resizeOrCreate(scores_,
                            inputScore->getHeight(),
                            1,
@@ -97,13 +98,14 @@ void KmaxSeqScoreLayer::forward(PassType passType) {
     scores_ = inputScore;
   }
 
-  // TODO(caoying)
-  // In PaddlePaddle, the currently available matrixes all a have real-typed
-  // data field, but the selected indices information are actually int-typed
-  // (with -1 as a special token). Storing indices information in real-typed
-  // Matrix leads to converting real to int. This is very dangerous if a user
-  // fills this matrix himself, invalid data may occur.
-  // The selected indices should be stored in an int-typed matrix.
+  /*
+   * TODO(caoying)
+   * In PaddePaddle, currently all matrices are real number types,
+   * but output of this layer which is some selected indices of the give
+   * sequence are actually filled with int types so that storing int types
+   * information in a real number matrix is dangerous, since real numbers will
+   * be convered to int types.
+   */
   Matrix::resizeOrCreate(
       output_.value,
       input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(),
diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp
index 165ee6311a..4da65ade0b 100644
--- a/paddle/gserver/layers/SequenceSliceLayer.cpp
+++ b/paddle/gserver/layers/SequenceSliceLayer.cpp
@@ -31,13 +31,15 @@ public:
   void backward(const UpdateCallback& callback = nullptr) override;
 
 private:
-  // TODO(caoying)
-  // In PaddlePaddle, the currently available matrixes all a have real-typed
-  // data field, but the selected indices information are actually int-typed
-  // (with -1 as a special token). Storing indices information in real-typed
-  // Matrix leads to converting real to int. This is very dangerous if a user
-  // fills this matrix himself, invalid data may occur.
-  // The selected indices should be stored in an int-typed matrix.
+  /*
+   * TODO(caoying)
+   * In PaddePaddle, currently all matrices are real number types,
+   * but the second and the (optional) third input which are some
+   * selected indices of the give sequence to trim the sequence, are actually
+   * filled with int types so that storing int types information in real number
+   * matrices is very dangerous, since real numbers will be convered to int
+   * types. If a user fills this matrix himself, invalid data may occor.
+   */
 
   MatrixPtr startIdsOnCpu_;
   MatrixPtr endIdsOnCpu_;
@@ -68,7 +70,7 @@ bool SequenceSliceLayer::init(const LayerMap& layerMap,
 
 void SequenceSliceLayer::checkInputs() {
   const Argument& inputSeq = getInput(0);
-  CHECK(inputSeq.hasSeq()) << "The first input of sequence slic layer "
+  CHECK(inputSeq.hasSeq()) << "The first input of sequence slice layer "
                            << "must be a sequence.";
   const MatrixPtr indices1 = getInputValue(1);
   CHECK_EQ(static_cast<size_t>(indices1->getHeight()),
@@ -86,22 +88,6 @@ void SequenceSliceLayer::checkInputs() {
 }
 
 void SequenceSliceLayer::copySliceIdsToCpu() {
-  if (!useGpu_) {
-    if (inputLayers_.size() == 2U) {
-      if (config_.select_first()) {
-        startIdsOnCpu_ = getInputValue(1);
-        endIdsOnCpu_ = nullptr;
-      } else {
-        startIdsOnCpu_ = nullptr;
-        endIdsOnCpu_ = getInputValue(1);
-      }
-    } else if (inputLayers_.size() == 3U) {
-      startIdsOnCpu_ = getInputValue(1);
-      endIdsOnCpu_ = getInputValue(2);
-    }
-    return;
-  }
-
   const MatrixPtr indices1 = getInputValue(1);
   if (inputLayers_.size() == 2U) {
     if (config_.select_first()) {
@@ -141,22 +127,19 @@ void SequenceSliceLayer::copySliceIdsToCpu() {
 
 void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
                                          const MatrixPtr ends) {
+  CHECK(starts && ends);
+
   outSeqStartPos_.resize(1, 0);
   outSubSeqStartPos_.resize(1, 0);
   selectedRows_.clear();
 
   size_t beamSize = starts ? starts->getWidth() : ends->getWidth();
-  // iterate over sequence
   size_t rowIdx = 0;
   for (size_t i = 0; i < inputSeqInfoVec_.size(); ++i) {
-    // iterate over sub-sequence in a sequence
     for (size_t j = 0; j < inputSeqInfoVec_[i].size() - 1; ++j) {
-      // iterate over each index for slicing.
       for (size_t k = 0; k < beamSize; ++k) {
-        if (starts) {
-          if (starts->getElement(rowIdx, k) == -1.) break;
-        } else if (ends->getElement(rowIdx, k) == -1.)
-          break;
+        if (starts && starts->getElement(rowIdx, k) == -1.) break;
+        if (ends && ends->getElement(rowIdx, k) == -1.) break;
 
         int begPos = inputSeqInfoVec_[i][j];
         if (starts) begPos += starts->getElement(rowIdx, k);
@@ -165,7 +148,7 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
         if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k);
 
         int seqLen = endPos - begPos + 1;
-        CHECK(seqLen);
+        CHECK_LT(seqLen, 0U);
         for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m);
         inputSeqInfoVec_.size() > 1
             ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen)
@@ -208,7 +191,16 @@ void SequenceSliceLayer::forward(PassType passType) {
   Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
                               inputSeq.subSequenceStartPositions,
                               inputSeqInfoVec_);
-  copySliceIdsToCpu();
+  if (!useGpu_) {
+    if (inputLayers_.size() == 2U) {
+      startIdsOnCpu_ = config_.select_first() ? getInputValue(1) : nullptr;
+      endIdsOnCpu_ = config_.select_first() ? nullptr : getInputValue(1);
+    } else if (inputLayers_.size() == 3U) {
+      startIdsOnCpu_ = getInputValue(1);
+      endIdsOnCpu_ = getInputValue(2);
+    }
+  } else
+    copySliceIdsToCpu();
 
   // calculate the selected row indices in a batch,
   // and build the output sequence information.
@@ -221,10 +213,7 @@ void SequenceSliceLayer::forward(PassType passType) {
 }
 
 void SequenceSliceLayer::backward(const UpdateCallback& callback) {
-  MatrixPtr inputSeqGrad = getInputGrad(0);
-  MatrixPtr outputGrad = getOutputGrad();
-
-  outputGrad->addToRows(*inputSeqGrad, *rowIndice_);
+  getOutputGrad()->addToRows(*getInputGrad(0), *rowIndice_);
 }
 
 }  // namespace paddle
diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp
index c8607d50f5..e9bee77212 100644
--- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp
+++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp
@@ -58,23 +58,28 @@ private:
   void calSelectedRows(const MatrixPtr selectedIndices,
                        const std::vector<std::vector<int>>& inputSeqInfo);
 
-  // if the second input of this layer is on GPU memory, copy it to CPU memory.
-  // TODO(caoying)
-  // In PaddlePaddle, the currently available matrixes all a have real-typed
-  // data field, but the selected indices information are actually int-typed
-  // (with -1 as a special token). Storing indices information in real-typed
-  // Matrix leads to converting real to int. This is very dangerous if a user
-  // fills this matrix himself, invalid data may occur.
-  // The selected indices should be stored in an int-typed matrix.
+  /*
+   * TODO(caoying)
+   * In PaddePaddle, currently all matrices are real number types,
+   * but the second is some selected indices of the give sequence to trim
+   * the nested sequence, are actually filled with int types so that storing
+   * int types information in real number matrices is very dangerous, since
+   * real numbers will be convered to int types. If a user fills this matrix
+   * himself, invalid data may occor.
+   *
+   * if the second input of this layer is on GPU memory, copy it to CPU memory.
+   */
   MatrixPtr selIdsCpu_;
 
-  // reorganized sequenceStartPositions and subSequenceStartPositions
-  // into a 2d vector to facilitate the sequence selection process.
+  /*
+   * reorganize sequenceStartPositions and subSequenceStartPositions
+   * into a 2d vector to facilitate the sequence selection process.
+   */
   std::vector<std::vector<int>> inputSeqInfoVec_;
 
-  // the final selected row indices in a batch,
-  // rowIndice_ and selectedRows_ actually share a same memory.
+  /* store the final selected row indices in a batch */
   IVectorPtr rowIndice_;
+  /* rowIndice_ and selectedRows_ actually share a same memory. */
   std::vector<int> selectedRows_;
 };
 
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index af14007de6..2fcccc6948 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -2717,10 +2717,7 @@ class SeqSliceLayer(LayerBase):
                     'If start and end indices are both given to'
                     'sequence slice layer, they should have the same width.')
         elif len(inputs) == 2:
-            if starts is not None:
-                self.config.select_first = True
-            else:
-                self.config.select_first = False
+            self.config.select_first = (starts is not None)
 
 
 @config_layer('sub_nested_seq')

From 377401fb0cc7947d09b007a2c52cb679905cf2b5 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Wed, 23 Aug 2017 14:13:51 +0800
Subject: [PATCH 067/170] fix a bug.

---
 paddle/gserver/layers/SequenceSliceLayer.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp
index 4da65ade0b..5d72d37304 100644
--- a/paddle/gserver/layers/SequenceSliceLayer.cpp
+++ b/paddle/gserver/layers/SequenceSliceLayer.cpp
@@ -127,7 +127,8 @@ void SequenceSliceLayer::copySliceIdsToCpu() {
 
 void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
                                          const MatrixPtr ends) {
-  CHECK(starts && ends);
+  CHECK(starts || ends) << "At least one of the start or end indices "
+                        << "should be given.";
 
   outSeqStartPos_.resize(1, 0);
   outSubSeqStartPos_.resize(1, 0);
@@ -148,7 +149,7 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
         if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k);
 
         int seqLen = endPos - begPos + 1;
-        CHECK_LT(seqLen, 0U);
+        CHECK_GT(seqLen, 0U);
         for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m);
         inputSeqInfoVec_.size() > 1
             ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen)

From f188e22b33c1a152a1835a5d0cb4b23e6e6d25bf Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Wed, 23 Aug 2017 14:39:16 +0800
Subject: [PATCH 068/170] Remove set functor and add comapre_grad test

---
 paddle/operators/CMakeLists.txt               |  3 +-
 paddle/operators/fill_zeros_like_op.h         |  2 +-
 paddle/operators/functor/CMakeLists.txt       |  5 ---
 paddle/operators/functor/math_functor.cc      | 42 -------------------
 paddle/operators/functor/math_functor.cu      | 42 -------------------
 paddle/operators/functor/math_functor.h       | 32 --------------
 paddle/operators/lookup_table_op.cu           | 26 ++++++------
 paddle/operators/lookup_table_op.h            | 10 ++---
 paddle/platform/cuda_helper.h                 |  4 --
 .../v2/framework/tests/gradient_checker.py    | 13 +++++-
 .../v2/framework/tests/test_lookup_table.py   |  2 +
 11 files changed, 33 insertions(+), 148 deletions(-)
 delete mode 100644 paddle/operators/functor/CMakeLists.txt
 delete mode 100644 paddle/operators/functor/math_functor.cc
 delete mode 100644 paddle/operators/functor/math_functor.cu
 delete mode 100644 paddle/operators/functor/math_functor.h

diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index 1ca5010eae..8d2d8a1141 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -42,7 +42,6 @@ function(op_library TARGET)
 endfunction()
 
 add_subdirectory(math)
-add_subdirectory(functor)
 
 cc_test(gather_test SRCS gather_test.cc DEPS tensor)
 
@@ -69,4 +68,4 @@ op_library(sgd_op SRCS sgd_op.cc sgd_op.cu)
 op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc
     DEPS framework_proto tensor op_registry operator net_op)
 op_library(uniform_random_op SRCS uniform_random_op.cc uniform_random_op.cu)
-op_library(lookup_table_op SRCS lookup_table_op.cc lookup_table_op.cu DEPS math_functor)
+op_library(lookup_table_op SRCS lookup_table_op.cc lookup_table_op.cu)
diff --git a/paddle/operators/fill_zeros_like_op.h b/paddle/operators/fill_zeros_like_op.h
index fd380ca851..969998ce2e 100644
--- a/paddle/operators/fill_zeros_like_op.h
+++ b/paddle/operators/fill_zeros_like_op.h
@@ -26,7 +26,7 @@ class FillZerosLikeKernel : public framework::OpKernel {
     auto* output = context.Output<framework::Tensor>("Dst");
     output->mutable_data<T>(context.GetPlace());
     auto t = framework::EigenVector<T>::Flatten(*output);
-    t.device(context.GetEigenDevice<Place>()) = t.constant(T(0));
+    t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0));
   }
 };
 
diff --git a/paddle/operators/functor/CMakeLists.txt b/paddle/operators/functor/CMakeLists.txt
deleted file mode 100644
index d3b39e5fc2..0000000000
--- a/paddle/operators/functor/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-if(WITH_GPU)
-    nv_library(math_functor SRCS math_functor.cc math_functor.cu DEPS device_context)
-else()
-    cc_library(math_functor SRCS math_functor.cc DEPS device_context)
-endif()
diff --git a/paddle/operators/functor/math_functor.cc b/paddle/operators/functor/math_functor.cc
deleted file mode 100644
index 1f2767f171..0000000000
--- a/paddle/operators/functor/math_functor.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/operators/functor/math_functor.h"
-#include "paddle/framework/eigen.h"
-
-namespace paddle {
-namespace operators {
-namespace functor {
-
-template <typename T>
-struct Set<platform::CPUPlace, T> {
-  void operator()(const T alpha, framework::Tensor* Y,
-                  platform::DeviceContext* context) {
-    int N = product(Y->dims());
-    T* YData = Y->mutable_data<T>(context->GetPlace());
-    if (alpha == static_cast<T>(0)) {
-      memset(YData, 0, N * sizeof(T));
-    } else {
-      framework::EigenVector<T, Eigen::RowMajor, Eigen::DenseIndex>::Flatten(*Y)
-          .setConstant(alpha);
-    }
-  }
-};
-
-template struct Set<platform::CPUPlace, float>;
-template struct Set<platform::CPUPlace, double>;
-
-}  // namespace functor
-}  // namespace operators
-}  // namespace paddle
diff --git a/paddle/operators/functor/math_functor.cu b/paddle/operators/functor/math_functor.cu
deleted file mode 100644
index 6dc828c60a..0000000000
--- a/paddle/operators/functor/math_functor.cu
+++ /dev/null
@@ -1,42 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/operators/functor/math_functor.h"
-#include "paddle/platform/cuda_helper.h"
-
-namespace paddle {
-namespace operators {
-namespace functor {
-
-template <typename T>
-__global__ void SetKernel(const int N, const T alpha, T* Y) {
-  CUDA_1D_KERNEL_LOOP(i, N) { Y[i] = alpha; }
-}
-
-template <typename T>
-struct Set<platform::GPUPlace, T> {
-  void operator()(const T alpha, framework::Tensor* Y,
-                  platform::DeviceContext* context) {
-    int N = product(Y->dims());
-    T* YData = Y->mutable_data<T>(context->GetPlace());
-    SetKernel<<<(N + 512 - 1) / 512, 512>>>(N, alpha, YData);
-  }
-};
-
-template struct Set<platform::GPUPlace, float>;
-template struct Set<platform::GPUPlace, double>;
-
-}  // namespace functor
-}  // namespace operators
-}  // namespace paddle
diff --git a/paddle/operators/functor/math_functor.h b/paddle/operators/functor/math_functor.h
deleted file mode 100644
index d5c7bd368f..0000000000
--- a/paddle/operators/functor/math_functor.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include "paddle/framework/tensor.h"
-#include "paddle/platform/device_context.h"
-
-namespace paddle {
-namespace operators {
-namespace functor {
-
-template <typename Place, typename T>
-struct Set {
-  void operator()(const T alpha, paddle::framework::Tensor* Y,
-                  paddle::platform::DeviceContext* context);
-};
-
-}  // namespace functor
-}  // namespace operators
-}  // namespace paddle
diff --git a/paddle/operators/lookup_table_op.cu b/paddle/operators/lookup_table_op.cu
index 99678ef681..27eee3436a 100644
--- a/paddle/operators/lookup_table_op.cu
+++ b/paddle/operators/lookup_table_op.cu
@@ -12,8 +12,8 @@
    See the License for the specific language governing permissions and
    limitations under the License. */
 
+#include "paddle/framework/eigen.h"
 #include "paddle/framework/op_registry.h"
-#include "paddle/operators/functor/math_functor.h"
 #include "paddle/platform/assert.h"
 #include "paddle/platform/cuda_helper.h"
 
@@ -22,11 +22,11 @@ namespace operators {
 
 using Tensor = framework::Tensor;
 
-template <typename T, int blockDimX, int blockDimY, int gridDimX>
+template <typename T, int BlockDimX, int BlockDimY, int GridDimX>
 __global__ void LookupTable(T* output, const T* table, const int32_t* ids,
                             const int N, const int K, const int D) {
   int idx = threadIdx.x;
-  int idy = blockIdx.x + threadIdx.y * gridDimX;
+  int idy = blockIdx.x + threadIdx.y * GridDimX;
 
   while (idy < K) {
     int id = ids[idy];
@@ -34,18 +34,18 @@ __global__ void LookupTable(T* output, const T* table, const int32_t* ids,
     PADDLE_ASSERT(id < N);
     T* out = output + idy * D;
     const T* tab = table + id * D;
-    for (int i = idx; i < D; i += blockDimX) {
+    for (int i = idx; i < D; i += BlockDimX) {
       out[i] = tab[i];
     }
-    idy += blockDimY * gridDimX;
+    idy += BlockDimY * GridDimX;
   }
 }
 
-template <typename T, int blockDimX, int blockDimY, int gridDimX>
+template <typename T, int BlockDimX, int BlockDimY, int GridDimX>
 __global__ void LookupTableGrad(T* table, const T* output, const int32_t* ids,
                                 const int N, const int K, const int D) {
   int idx = threadIdx.x;
-  int idy = blockIdx.x + threadIdx.y * gridDimX;
+  int idy = blockIdx.x + threadIdx.y * GridDimX;
 
   while (idy < K) {
     int id = ids[idy];
@@ -53,10 +53,10 @@ __global__ void LookupTableGrad(T* table, const T* output, const int32_t* ids,
     PADDLE_ASSERT(id < N);
     const T* out = output + idy * D;
     T* tab = table + id * D;
-    for (int i = idx; i < D; i += blockDimX) {
+    for (int i = idx; i < D; i += BlockDimX) {
       paddle::platform::CudaAtomicAdd(&tab[i], out[i]);
     }
-    idy += blockDimY * gridDimX;
+    idy += BlockDimY * GridDimX;
   }
 }
 
@@ -96,10 +96,10 @@ class LookupTableGradCUDAKernel : public framework::OpKernel {
     const T* d_output = d_output_t->data<T>();
     T* d_table = d_table_t->mutable_data<T>(context.GetPlace());
 
-    auto* device_context =
-        const_cast<platform::DeviceContext*>(context.device_context_);
-    functor::Set<paddle::platform::GPUPlace, T>()(static_cast<T>(0), d_table_t,
-                                                  device_context);
+    auto t = framework::EigenVector<T>::Flatten(*d_table_t);
+    t.device(context.GetEigenDevice<platform::GPUPlace>()) =
+        t.constant(static_cast<T>(0));
+
     dim3 threads(128, 8);
     dim3 grids(8, 1);
     LookupTableGrad<T, 128, 8, 8><<<grids, threads>>>(d_table, d_output, ids, N,
diff --git a/paddle/operators/lookup_table_op.h b/paddle/operators/lookup_table_op.h
index 9254e03a1b..4da8079b91 100644
--- a/paddle/operators/lookup_table_op.h
+++ b/paddle/operators/lookup_table_op.h
@@ -14,8 +14,8 @@
 
 #pragma once
 
+#include "paddle/framework/eigen.h"
 #include "paddle/framework/op_registry.h"
-#include "paddle/operators/functor/math_functor.h"
 
 namespace paddle {
 namespace operators {
@@ -57,10 +57,10 @@ class LookupTableGradKernel : public framework::OpKernel {
     const T* d_output = d_output_t->data<T>();
     T* d_table = d_table_t->mutable_data<T>(context.GetPlace());
 
-    auto* device_context =
-        const_cast<platform::DeviceContext*>(context.device_context_);
-    functor::Set<paddle::platform::CPUPlace, T>()(static_cast<T>(0), d_table_t,
-                                                  device_context);
+    auto t = framework::EigenVector<T>::Flatten(*d_table_t);
+    t.device(context.GetEigenDevice<platform::CPUPlace>()) =
+        t.constant(static_cast<T>(0));
+
     for (size_t i = 0; i < product(ids_t->dims()); ++i) {
       PADDLE_ENFORCE_LT(ids[i], N);
       PADDLE_ENFORCE_GE(ids[i], 0);
diff --git a/paddle/platform/cuda_helper.h b/paddle/platform/cuda_helper.h
index 939c3713ad..6feec0d7f8 100644
--- a/paddle/platform/cuda_helper.h
+++ b/paddle/platform/cuda_helper.h
@@ -18,10 +18,6 @@ limitations under the License. */
 namespace paddle {
 namespace platform {
 
-#define CUDA_1D_KERNEL_LOOP(i, n)                            \
-  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
-       i += blockDim.x * gridDim.x)
-
 #define CUDA_ATOMIC_WRAPPER(op, T) \
   __device__ __forceinline__ T CudaAtomic##op(T* address, const T val)
 
diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py
index 8b8e2f444b..06b82fa2e4 100644
--- a/python/paddle/v2/framework/tests/gradient_checker.py
+++ b/python/paddle/v2/framework/tests/gradient_checker.py
@@ -23,6 +23,10 @@ def grad_var_name(var_name):
     return var_name + "@GRAD"
 
 
+def empty_var_name():
+    return "@EMPTY@"
+
+
 def get_numeric_gradient(op,
                          input_values,
                          output_name,
@@ -171,7 +175,7 @@ class GradientChecker(unittest.TestCase):
         ]
         return outs
 
-    def compare_grad(self, forward_op, input_value):
+    def compare_grad(self, forward_op, input_value, no_grad_set=None):
         """ Compare the input gradients between CPU and GPU for the given forward
         operator.
 
@@ -179,15 +183,20 @@ class GradientChecker(unittest.TestCase):
         :type forward_op: Operator
         :param input_value: input values.
         :type input_value: dict{string:numpy.array}
+        :param no_grad_set: the set of variables names without gradients.
+        :type no_grad_set: a set of string
         :raises: AssertionError, there is different gradient value.
         """
-        backward_op = core.Operator.backward(forward_op, set())
+        if no_grad_set is None:
+            no_grad_set = set()
+        backward_op = core.Operator.backward(forward_op, no_grad_set)
         # return if not compile with GPU or not implementing GPU kernel
         if not (core.is_compile_gpu() and backward_op.support_gpu()):
             return
 
         outputs = backward_op.outputs()
         out_names = [item for k in outputs for item in outputs[k]]
+        out_names = filter(lambda x: x != empty_var_name(), out_names)
         cpu_grads = self.__get_gradient(forward_op, backward_op, input_value,
                                         out_names, core.CPUPlace())
         gpu_grads = self.__get_gradient(forward_op, backward_op, input_value,
diff --git a/python/paddle/v2/framework/tests/test_lookup_table.py b/python/paddle/v2/framework/tests/test_lookup_table.py
index 3056bf53e3..19eb464baa 100644
--- a/python/paddle/v2/framework/tests/test_lookup_table.py
+++ b/python/paddle/v2/framework/tests/test_lookup_table.py
@@ -21,6 +21,8 @@ class TestSigmoidGradOp(GradientChecker):
         table = np.random.random((17, 31)).astype('float32')
         ids = np.random.randint(0, 17, 4).astype('int32')
         inputs = {'W': table, 'Ids': ids}
+        # comapre gradients 
+        self.compare_grad(op, inputs, set(['Ids']))
         # check gradients 
         self.check_grad(op, inputs, set('W'), 'Out')
 

From f715c740bf2bfedb779ba4876f4d6b16e770e61d Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Mon, 21 Aug 2017 23:07:51 +0800
Subject: [PATCH 069/170] Add_config_parser_for_Conv3D_DeConv3D

---
 proto/ModelConfig.proto                       |   1 +
 python/paddle/trainer/config_parser.py        | 266 ++++++++++++++-
 python/paddle/trainer/recurrent_units.py      |   0
 .../paddle/trainer_config_helpers/layers.py   | 316 ++++++++++++------
 .../paddle/trainer_config_helpers/networks.py |   4 +-
 .../configs/conv3d_deconv3d_test_config.py    |  98 ++++++
 .../tests/layers_test.py                      |   4 +-
 7 files changed, 581 insertions(+), 108 deletions(-)
 mode change 100755 => 100644 python/paddle/trainer/recurrent_units.py
 mode change 100755 => 100644 python/paddle/trainer_config_helpers/layers.py
 mode change 100755 => 100644 python/paddle/trainer_config_helpers/networks.py
 create mode 100644 python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py

diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto
index 8c6eb5b7e1..21049ba0a0 100644
--- a/proto/ModelConfig.proto
+++ b/proto/ModelConfig.proto
@@ -489,6 +489,7 @@ message LayerConfig {
   // to indicate rectangle image data
   optional uint64 height = 50;
   optional uint64 width = 51;
+  optional uint64 depth = 57 [ default = 1 ];
 
   // blank label used in ctc loss
   optional uint32 blank = 52 [ default = 0 ];
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index b7b696ef0c..49b3c430e7 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -881,6 +881,42 @@ class Conv(Cfg):
             config_assert(output_x <= 0)
 
 
+# please refer to the comments in proto/ModelConfig.proto
+@config_class
+class Conv3D(Cfg):
+    def __init__(self,
+                 filter_size,
+                 channels,
+                 padding=None,
+                 stride=None,
+                 groups=None,
+                 filter_channels=None,
+                 output_x=None,
+                 img_size=None,
+                 caffe_mode=True,
+                 filter_size_y=None,
+                 padding_y=None,
+                 stride_y=None,
+                 filter_size_z=None,
+                 padding_z=None,
+                 stride_z=None):
+        self.add_keys(locals())
+        if filter_size_y is None:
+            self.filter_size_y = filter_size
+        if padding_y is None:
+            self.padding_y = padding
+        if stride_y is None:
+            self.stride_y = stride
+        if output_x is not None:
+            config_assert(output_x <= 0)
+        if filter_size_z is None:
+            self.filter_size_z = filter_size
+        if padding_z is None:
+            self.padding_z = padding
+        if stride_z is None:
+            self.stride_z = stride
+
+
 @config_class
 class BilinearInterp(Cfg):
     def __init__(self, out_size_x=None, out_size_y=None, channels=None):
@@ -1167,6 +1203,20 @@ def get_img_size(input_layer_name, channels):
     return img_size, img_size_y
 
 
+def get_img3d_size(input_layer_name, channels):
+    input = g_layer_map[input_layer_name]
+    img_pixels = input.size / channels
+    img_size = input.width if input.width > 0 else int(img_pixels**0.5)
+    img_size_y = input.height if input.height > 0 else int(img_pixels /
+                                                           img_size)
+    img_size_z = input.depth if input.depth > 1 else 1
+    config_assert(
+        img_size * img_size_y * img_size_z == img_pixels,
+        "Input layer %s: Incorrect input image size %d * %d * %d for input image pixels %d"
+        % (input_layer_name, img_size, img_size_y, img_size_z, img_pixels))
+    return img_size, img_size_y, img_size_z
+
+
 def parse_bilinear(bilinear, input_layer_name, bilinear_conf):
     parse_image(bilinear, input_layer_name, bilinear_conf.image_conf)
     bilinear_conf.out_size_x = bilinear.out_size_x
@@ -1277,6 +1327,50 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False):
             conv_conf.stride_y, conv_conf.caffe_mode)
 
 
+#caffe_mode: compute the output size using floor instead of ceil,
+#            which is consistent of caffe and CuDNN's convention.
+def parse_conv3d(conv, input_layer_name, conv_conf, num_filters, trans=False):
+    conv_conf.filter_size = conv.filter_size
+    conv_conf.filter_size_y = conv.filter_size_y
+    conv_conf.filter_size_z = conv.filter_size_z
+    conv_conf.channels = conv.channels
+    conv_conf.padding = conv.padding
+    conv_conf.padding_y = conv.padding_y
+    conv_conf.padding_z = conv.padding_z
+    conv_conf.stride = conv.stride
+    conv_conf.stride_y = conv.stride_y
+    conv_conf.stride_z = conv.stride_z
+    conv_conf.groups = conv.groups
+    conv_conf.caffe_mode = conv.caffe_mode
+
+    if not trans:
+        conv_conf.filter_channels = conv.channels / conv.groups
+        conv_conf.img_size, conv_conf.img_size_y, conv_conf.img_size_z = \
+            get_img3d_size(input_layer_name, conv.channels)
+        conv_conf.output_x = cnn_output_size(
+            conv_conf.img_size, conv_conf.filter_size, conv_conf.padding,
+            conv_conf.stride, conv_conf.caffe_mode)
+        conv_conf.output_y = cnn_output_size(
+            conv_conf.img_size_y, conv_conf.filter_size_y, conv_conf.padding_y,
+            conv_conf.stride_y, conv_conf.caffe_mode)
+        conv_conf.output_z = cnn_output_size(
+            conv_conf.img_size_z, conv_conf.filter_size_z, conv_conf.padding_z,
+            conv_conf.stride_z, conv_conf.caffe_mode)
+    else:
+        conv_conf.filter_channels = num_filters / conv.groups
+        conv_conf.output_x, conv_conf.output_y, conv_conf.output_z = \
+            get_img3d_size(input_layer_name, conv.channels)
+        conv_conf.img_size = cnn_image_size(
+            conv_conf.output_x, conv_conf.filter_size, conv_conf.padding,
+            conv_conf.stride, conv_conf.caffe_mode)
+        conv_conf.img_size_y = cnn_image_size(
+            conv_conf.output_y, conv_conf.filter_size_y, conv_conf.padding_y,
+            conv_conf.stride_y, conv_conf.caffe_mode)
+        conv_conf.img_size_z = cnn_image_size(
+            conv_conf.output_z, conv_conf.filter_size_z, conv_conf.padding_z,
+            conv_conf.stride_z, conv_conf.caffe_mode)
+
+
 def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
     block_expand_conf.channels = block_expand.channels
     block_expand_conf.stride_x = block_expand.stride_x
@@ -1580,6 +1674,9 @@ class LayerBase(object):
         self.config.height = height
         self.config.width = width
 
+    def set_layer_depth(self, depth):
+        self.config.depth = depth
+
     def set_cnn_layer(self,
                       input_layer_name,
                       height,
@@ -1763,11 +1860,19 @@ class DetectionOutputLayer(LayerBase):
 
 @config_layer('data')
 class DataLayer(LayerBase):
-    def __init__(self, name, size, height=None, width=None, device=None):
+    def __init__(self,
+                 name,
+                 size,
+                 height=None,
+                 width=None,
+                 depth=None,
+                 device=None):
         super(DataLayer, self).__init__(
             name, 'data', size, inputs=[], device=device)
         if height and width:
             self.set_layer_height_width(height, width)
+        if depth:
+            self.set_layer_depth(depth)
 
 
 '''
@@ -1882,7 +1987,7 @@ class ConvLayerBase(LayerBase):
 
     def calc_parameter_size(self, conv_conf):
         return self.config.num_filters * conv_conf.filter_channels \
-                    * (conv_conf.filter_size * conv_conf.filter_size_y)
+               * (conv_conf.filter_size * conv_conf.filter_size_y)
 
 
 @config_layer('exconv')
@@ -1895,6 +2000,163 @@ class ConvLayer(ConvLayerBase):
     layer_type = 'cudnn_conv'
 
 
+@config_layer('conv_3d')
+class Conv3DLayerBase(LayerBase):
+    def __init__(self,
+                 name,
+                 inputs=[],
+                 bias=True,
+                 num_filters=None,
+                 shared_biases=False,
+                 **xargs):
+        super(Conv3DLayerBase, self).__init__(
+            name, self.layer_type, 0, inputs=inputs, **xargs)
+
+        if num_filters is not None:
+            self.config.num_filters = num_filters
+
+        use_gpu = int(g_command_config_args.get("use_gpu", 0))
+        parallel_nn = int(g_command_config_args.get("parallel_nn", 0))
+
+        # Automatically select cudnn_type for GPU and exconv for CPU
+        # if set type=conv, but still reserve the way user specify
+        # exconv or cudnn_conv manually.
+        if self.layer_type == "cudnn_conv3d":
+            config_assert(use_gpu, "cudnn_conv3d only support GPU")
+
+        # need to specify layer in config
+        self.config.type = self.layer_type
+
+        if shared_biases is not None:
+            self.config.shared_biases = shared_biases
+
+        for input_index in xrange(len(self.inputs)):
+            input_layer = self.get_input_layer(input_index)
+            conv_conf = self.config.inputs[input_index].conv_conf
+            parse_conv3d(
+                self.inputs[input_index].conv, input_layer.name, conv_conf,
+                num_filters
+            )  # for z-axis pad:0, strid:1, filter_size:1, img_size:1
+            psize = self.calc_parameter_size(conv_conf)
+            self.create_input_parameter(input_index, psize)
+            self.set_cnn_layer(name, conv_conf.output_z, conv_conf.output_y,
+                               conv_conf.output_x, self.config.num_filters)
+
+        psize = self.config.size
+        if shared_biases:
+            psize = self.config.num_filters
+        self.create_bias_parameter(bias, psize, [psize, 1])
+
+    def calc_parameter_size(self, conv_conf):
+        return self.config.num_filters * conv_conf.filter_channels \
+               * (conv_conf.filter_size * conv_conf.filter_size_y \
+                  * conv_conf.filter_size_z)
+
+    def set_layer_height_width(self, depth, height, width):
+        self.config.depth = depth
+        self.config.height = height
+        self.config.width = width
+
+    def set_cnn_layer(self,
+                      input_layer_name,
+                      depth,
+                      height,
+                      width,
+                      channels,
+                      is_print=True):
+        size = depth * height * width * channels
+        self.set_layer_size(size)
+        self.set_layer_height_width(depth, height, width)
+        if is_print:
+            print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" %
+                  (input_layer_name, channels, depth, height, width, size))
+
+
+@config_layer('conv3d')
+class Conv3DLayer(Conv3DLayerBase):
+    layer_type = 'conv3d'
+
+
+@config_layer('convt_3d')
+class Conv3DTransLayerBase(LayerBase):
+    def __init__(self,
+                 name,
+                 inputs=[],
+                 bias=True,
+                 num_filters=None,
+                 shared_biases=False,
+                 **xargs):
+        super(Conv3DTransLayerBase, self).__init__(
+            name, self.layer_type, 0, inputs=inputs, **xargs)
+
+        if num_filters is not None:
+            self.config.num_filters = num_filters
+
+        use_gpu = int(g_command_config_args.get("use_gpu", 0))
+        parallel_nn = int(g_command_config_args.get("parallel_nn", 0))
+
+        # Automatically select cudnn_type for GPU and exconv for CPU
+        # if set type=conv, but still reserve the way user specify
+        # exconv or cudnn_conv manually.
+        if self.layer_type == "cudnn_deconv3d":
+            config_assert(use_gpu, "cudnn_conv3d only support GPU")
+
+        # need to specify layer in config
+        self.config.type = self.layer_type
+
+        if shared_biases is not None:
+            self.config.shared_biases = shared_biases
+
+        for input_index in xrange(len(self.inputs)):
+            input_layer = self.get_input_layer(input_index)
+            conv_conf = self.config.inputs[input_index].conv_conf
+            parse_conv3d(
+                self.inputs[input_index].conv,
+                input_layer.name,
+                conv_conf,
+                num_filters,
+                trans=True
+            )  # for z-axis pad:0, strid:1, filter_size:1, img_size:1
+            psize = self.calc_parameter_size(conv_conf)
+            self.create_input_parameter(input_index, psize)
+            self.set_cnn_layer(name, conv_conf.img_size_z, conv_conf.img_size_y,
+                               conv_conf.img_size, self.config.num_filters)
+
+        psize = self.config.size
+        if shared_biases:
+            psize = self.config.num_filters
+        self.create_bias_parameter(bias, psize, [psize, 1])
+
+    def calc_parameter_size(self, conv_conf):
+        return self.config.num_filters * conv_conf.filter_channels \
+               * (conv_conf.filter_size * conv_conf.filter_size_y \
+                  * conv_conf.filter_size_z)
+
+    def set_layer_height_width(self, depth, height, width):
+        self.config.depth = depth
+        self.config.height = height
+        self.config.width = width
+
+    def set_cnn_layer(self,
+                      input_layer_name,
+                      depth,
+                      height,
+                      width,
+                      channels,
+                      is_print=True):
+        size = depth * height * width * channels
+        self.set_layer_size(size)
+        self.set_layer_height_width(depth, height, width)
+        if is_print:
+            print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" %
+                  (input_layer_name, channels, depth, height, width, size))
+
+
+@config_layer('deconv3d')
+class DeConv3DLayer(Conv3DTransLayerBase):
+    layer_type = 'deconv3d'
+
+
 @config_layer('convt')
 class ConvTransLayerBase(LayerBase):
     layer_type = 'convt'
diff --git a/python/paddle/trainer/recurrent_units.py b/python/paddle/trainer/recurrent_units.py
old mode 100755
new mode 100644
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
old mode 100755
new mode 100644
index 1bc55c8696..6953f134c5
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -31,108 +31,34 @@ except ImportError:
 import copy
 
 __all__ = [
-    'full_matrix_projection',
-    'AggregateLevel',
-    'ExpandLevel',
-    'identity_projection',
-    'dotmul_projection',
-    'dotmul_operator',
-    'repeat_layer',
-    'seq_reshape_layer',
-    'table_projection',
-    'mixed_layer',
-    'data_layer',
-    'embedding_layer',
-    'fc_layer',
-    'grumemory',
-    'pooling_layer',
-    'lstmemory',
-    'last_seq',
-    'first_seq',
-    'cos_sim',
-    'hsigmoid',
-    'conv_projection',
-    'mse_cost',
-    'regression_cost',
-    'classification_cost',
-    'LayerOutput',
-    'img_conv_layer',
-    'img_pool_layer',
-    'batch_norm_layer',
-    'img_cmrnorm_layer',
-    'addto_layer',
-    'concat_layer',
-    'seq_concat_layer',
-    'lstm_step_layer',
-    'recurrent_group',
-    'memory',
-    'StaticInput',
-    'expand_layer',
-    'scaling_layer',
-    'scaling_projection',
-    'power_layer',
-    'interpolation_layer',
-    'bilinear_interp_layer',
-    'trans_layer',
-    'rotate_layer',
-    'sum_to_one_norm_layer',
-    'row_l2_norm_layer',
-    'get_output_layer',
-    'LayerType',
-    'context_projection',
-    'beam_search',
-    'maxid_layer',
-    'GeneratedInput',
-    'SubsequenceInput',
-    'gru_step_layer',
-    'gru_step_naive_layer',
-    'recurrent_layer',
-    'BaseGeneratedInput',
-    'conv_operator',
-    'conv_shift_layer',
-    'tensor_layer',
-    'selective_fc_layer',
-    'sampling_id_layer',
-    'slope_intercept_layer',
-    'trans_full_matrix_projection',
-    'linear_comb_layer',
-    'convex_comb_layer',
-    'ctc_layer',
-    'warp_ctc_layer',
-    'crf_layer',
-    'crf_decoding_layer',
-    'nce_layer',
-    'cross_entropy_with_selfnorm',
-    'cross_entropy',
-    'multi_binary_label_cross_entropy',
-    'sum_cost',
-    'rank_cost',
-    'lambda_cost',
-    'huber_cost',
-    'block_expand_layer',
-    'maxout_layer',
-    'out_prod_layer',
-    'printer_layer',
-    'print_layer',
-    'priorbox_layer',
-    'cross_channel_norm_layer',
-    'multibox_loss_layer',
-    'detection_output_layer',
-    'spp_layer',
-    'pad_layer',
-    'eos_layer',
-    'smooth_l1_cost',
-    'layer_support',
-    'multiplex_layer',
-    'row_conv_layer',
-    'dropout_layer',
-    'prelu_layer',
-    'gated_unit_layer',
-    'crop_layer',
-    'sub_nested_seq_layer',
-    'clip_layer',
-    'slice_projection',
-    'kmax_sequence_score_layer',
+    'full_matrix_projection', 'AggregateLevel', 'ExpandLevel',
+    'identity_projection', 'dotmul_projection', 'dotmul_operator',
+    'repeat_layer', 'seq_reshape_layer', 'table_projection', 'mixed_layer',
+    'data_layer', 'embedding_layer', 'fc_layer', 'grumemory', 'pooling_layer',
+    'lstmemory', 'last_seq', 'first_seq', 'cos_sim', 'hsigmoid',
+    'conv_projection', 'mse_cost', 'regression_cost', 'classification_cost',
+    'LayerOutput', 'img_conv_layer', 'img_pool_layer', 'batch_norm_layer',
+    'img_cmrnorm_layer', 'addto_layer', 'concat_layer', 'seq_concat_layer',
+    'lstm_step_layer', 'recurrent_group', 'memory', 'StaticInput',
+    'expand_layer', 'scaling_layer', 'scaling_projection', 'power_layer',
+    'interpolation_layer', 'bilinear_interp_layer', 'trans_layer',
+    'rotate_layer', 'sum_to_one_norm_layer', 'row_l2_norm_layer',
+    'get_output_layer', 'LayerType', 'context_projection', 'beam_search',
+    'maxid_layer', 'GeneratedInput', 'SubsequenceInput', 'gru_step_layer',
+    'gru_step_naive_layer', 'recurrent_layer', 'BaseGeneratedInput',
+    'conv_operator', 'conv_shift_layer', 'tensor_layer', 'selective_fc_layer',
+    'sampling_id_layer', 'slope_intercept_layer',
+    'trans_full_matrix_projection', 'linear_comb_layer', 'convex_comb_layer',
+    'ctc_layer', 'warp_ctc_layer', 'crf_layer', 'crf_decoding_layer',
+    'nce_layer', 'cross_entropy_with_selfnorm', 'cross_entropy',
+    'multi_binary_label_cross_entropy', 'sum_cost', 'rank_cost', 'lambda_cost',
+    'huber_cost', 'block_expand_layer', 'maxout_layer', 'out_prod_layer',
+    'printer_layer', 'print_layer', 'priorbox_layer',
+    'cross_channel_norm_layer', 'multibox_loss_layer', 'detection_output_layer',
+    'spp_layer', 'pad_layer', 'eos_layer', 'smooth_l1_cost', 'layer_support',
+    'multiplex_layer', 'row_conv_layer', 'dropout_layer', 'prelu_layer',
+    'gated_unit_layer', 'crop_layer', 'sub_nested_seq_layer', 'clip_layer',
+    'slice_projection', 'kmax_sequence_score_layer', 'img_conv3d_layer'
 ]
 
 
@@ -214,6 +140,9 @@ class LayerType(object):
     CRF_DECODING_LAYER = 'crf_decoding'
     NCE_LAYER = 'nce'
 
+    CONV3D_LAYER = 'conv3d'
+    DECONV3D_LAYER = 'deconv3d'
+
     RANK_COST = 'rank-cost'
     LAMBDA_COST = 'lambda_cost'
     HUBER = 'huber'
@@ -878,7 +807,8 @@ def mixed_layer(size=0,
 
 
 @layer_support()
-def data_layer(name, size, height=None, width=None, layer_attr=None):
+def data_layer(name, size, height=None, width=None, depth=None,
+               layer_attr=None):
     """
     Define DataLayer For NeuralNetwork.
 
@@ -907,6 +837,7 @@ def data_layer(name, size, height=None, width=None, layer_attr=None):
         size=size,
         height=height,
         width=width,
+        depth=depth,
         **ExtraLayerAttribute.to_kwargs(layer_attr))
 
     return LayerOutput(name, LayerType.DATA, size=size)
@@ -6210,3 +6141,182 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1):
 
     return LayerOutput(
         name, LayerType.KMAX_SEQ_SCORE, parents=[input], size=input.size)
+
+
+@wrap_name_default("conv3d")
+@wrap_param_attr_default()
+@wrap_bias_attr_default()
+@wrap_act_default(act=ReluActivation())
+@layer_support(DROPOUT)
+def img_conv3d_layer(input,
+                     filter_size,
+                     num_filters,
+                     name=None,
+                     num_channels=None,
+                     act=None,
+                     groups=1,
+                     stride=1,
+                     padding=0,
+                     bias_attr=None,
+                     param_attr=None,
+                     shared_biases=True,
+                     layer_attr=None,
+                     filter_size_y=None,
+                     stride_y=None,
+                     padding_y=None,
+                     filter_size_z=None,
+                     stride_z=None,
+                     padding_z=None,
+                     trans=False,
+                     layer_type=None):
+    """
+
+    The example usage is:
+
+    ..  code-block:: python
+
+        conv = img_conv3d_layer(input=data, filter_size=1, filter_size_y=1,
+                              num_channels=8,
+                              num_filters=16, stride=1,
+                              bias_attr=False,
+                              act=ReluActivation())
+
+    :param name: Layer name.
+    :type name: basestring
+    :param input: Layer Input.
+    :type input: LayerOutput
+    :param filter_size: The x dimension of a filter kernel. Or input a tuple for
+                        two image dimension.
+    :type filter_size: int|tuple|list
+    :param filter_size_y: The y dimension of a filter kernel. Since PaddlePaddle
+                        currently supports rectangular filters, the filter's
+                        shape will be (filter_size, filter_size_y).
+    :type filter_size_y: int|None
+    :param num_filters: Each filter group's number of filter
+    :param act: Activation type. Default is tanh
+    :type act: BaseActivation
+    :param groups: Group size of filters.
+    :type groups: int
+    :param stride: The x dimension of the stride. Or input a tuple for two image
+                   dimension.
+    :type stride: int|tuple|list
+    :param stride_y: The y dimension of the stride.
+    :type stride_y: int
+    :param padding: The x dimension of the padding. Or input a tuple for two
+                    image dimension
+    :type padding: int|tuple|list
+    :param padding_y: The y dimension of the padding.
+    :type padding_y: int
+    :param bias_attr: Convolution bias attribute. None means default bias.
+                      False means no bias.
+    :type bias_attr: ParameterAttribute|False
+    :param num_channels: number of input channels. If None will be set
+                        automatically from previous output.
+    :type num_channels: int
+    :param param_attr: Convolution param attribute. None means default attribute
+    :type param_attr: ParameterAttribute
+    :param shared_biases: Is biases will be shared between filters or not.
+    :type shared_biases: bool
+    :param layer_attr: Layer Extra Attribute.
+    :type layer_attr: ExtraLayerAttribute
+    :param trans: true if it is a convTransLayer, false if it is a convLayer
+    :type trans: bool
+    :param layer_type: specify the layer_type, default is None. If trans=True,
+                       layer_type has to be "exconvt" or "cudnn_convt",
+                       otherwise layer_type has to be either "exconv" or
+                       "cudnn_conv"
+    :type layer_type: String
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+    """
+    if num_channels is None:
+        assert input.num_filters is not None
+        num_channels = input.num_filters
+
+    if filter_size_y is None:
+        if isinstance(filter_size, collections.Sequence):
+            assert len(filter_size) == 2
+            filter_size, filter_size_y = filter_size
+        else:
+            filter_size_y = filter_size
+
+    if filter_size_z is None:
+        if isinstance(filter_size, collections.Sequence):
+            assert len(filter_size) == 2
+            filter_size, filter_size_z = filter_size
+        else:
+            filter_size_z = filter_size
+
+    if stride_y is None:
+        if isinstance(stride, collections.Sequence):
+            assert len(stride) == 2
+            stride, stride_y = stride
+        else:
+            stride_y = stride
+
+    if stride_z is None:
+        if isinstance(stride, collections.Sequence):
+            assert len(stride) == 2
+            stride, stride_z = stride
+        else:
+            stride_z = stride
+
+    if padding_y is None:
+        if isinstance(padding, collections.Sequence):
+            assert len(padding) == 2
+            padding, padding_y = padding
+        else:
+            padding_y = padding
+
+    if padding_z is None:
+        if isinstance(padding, collections.Sequence):
+            assert len(padding) == 2
+            padding, padding_z = padding
+        else:
+            padding_z = padding
+
+    if param_attr.attr.get('initial_smart'):
+        # special initial for conv layers.
+        init_w = (2.0 / (filter_size**2 * num_channels))**0.5
+        param_attr.attr["initial_mean"] = 0.0
+        param_attr.attr["initial_std"] = init_w
+        param_attr.attr["initial_strategy"] = 0
+        param_attr.attr["initial_smart"] = False
+
+    if layer_type:
+        if trans:
+            assert layer_type in ["deconv3d"]
+        lt = layer_type
+    else:
+        lt = LayerType.DECONV3D_LAYER if trans else LayerType.CONV3D_LAYER
+
+    l = Layer(
+        name=name,
+        inputs=Input(
+            input.name,
+            conv=Conv3D(
+                filter_size=filter_size,
+                padding=padding,
+                stride=stride,
+                channels=num_channels,
+                groups=groups,
+                filter_size_y=filter_size_y,
+                padding_y=padding_y,
+                stride_y=stride_y,
+                filter_size_z=filter_size_z,
+                padding_z=padding_z,
+                stride_z=stride_z),
+            **param_attr.attr),
+        active_type=act.name,
+        num_filters=num_filters,
+        bias=ParamAttr.to_bias(bias_attr),
+        shared_biases=shared_biases,
+        type=lt,
+        **ExtraLayerAttribute.to_kwargs(layer_attr))
+    return LayerOutput(
+        name,
+        lt,
+        parents=[input],
+        activation=act,
+        num_filters=num_filters,
+        size=l.config.size)
diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py
old mode 100755
new mode 100644
index 34be203ee2..28a71cf788
--- a/python/paddle/trainer_config_helpers/networks.py
+++ b/python/paddle/trainer_config_helpers/networks.py
@@ -1406,7 +1406,7 @@ def inputs(layers, *args):
     if len(args) != 0:
         layers.extend(args)
 
-    Inputs(*[l.name for l in layers])
+    Inputs(* [l.name for l in layers])
 
 
 def outputs(layers, *args):
@@ -1456,7 +1456,7 @@ def outputs(layers, *args):
     assert len(layers) > 0
 
     if HasInputsSet():  # input already set
-        Outputs(*[l.name for l in layers])
+        Outputs(* [l.name for l in layers])
         return  # just return outputs.
 
     if len(layers) != 1:
diff --git a/python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py b/python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py
new file mode 100644
index 0000000000..da0d23d057
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py
@@ -0,0 +1,98 @@
+from paddle.trainer_config_helpers import *
+
+settings(batch_size=1000, learning_rate=1e-5)
+
+num_channels = 3
+filter_size = 3
+filter_size_y = 3
+filter_size_z = 3
+stride = 2
+stride_y = 2
+stride_z = 2
+padding = 1
+padding_y = 1
+padding_z = 1
+groups = 1
+
+data = data_layer(
+    name='data1', size=12096 * num_channels, height=48, width=42, depth=6)
+
+conv3d = img_conv3d_layer(
+    input=data,
+    name='conv3d_1',
+    num_filters=16,
+    num_channels=num_channels,
+    filter_size=filter_size,
+    filter_size_y=filter_size,
+    filter_size_z=filter_size,
+    stride=stride,
+    stride_y=stride_y,
+    stride_z=stride_z,
+    padding=padding,
+    padding_y=padding_y,
+    padding_z=padding_z,
+    groups=groups,
+    bias_attr=True,
+    shared_biases=True,
+    trans=False,
+    layer_type="conv3d",
+    act=LinearActivation())
+
+deconv3d = img_conv3d_layer(
+    input=data,
+    name='deconv3d_1',
+    num_filters=16,
+    num_channels=num_channels,
+    filter_size=filter_size,
+    filter_size_y=filter_size,
+    filter_size_z=filter_size,
+    stride=stride,
+    stride_y=stride_y,
+    stride_z=stride_z,
+    padding=padding,
+    padding_y=padding_y,
+    padding_z=padding_z,
+    groups=groups,
+    bias_attr=True,
+    shared_biases=True,
+    trans=True,
+    layer_type="deconv3d",
+    act=LinearActivation())
+
+data = data_layer(name="input", size=8 * 16 * 16)
+conv1 = img_conv_layer(
+    input=data,
+    filter_size=1,
+    filter_size_y=1,
+    num_channels=8,
+    num_filters=16,
+    stride=1,
+    bias_attr=False,
+    act=ReluActivation(),
+    layer_type="exconv")
+conv2 = img_conv_layer(
+    input=data,
+    filter_size=1,
+    filter_size_y=1,
+    num_channels=8,
+    num_filters=16,
+    stride=1,
+    bias_attr=False,
+    act=ReluActivation(),
+    layer_type="exconv")
+
+concat = concat_layer(input=[conv1, conv2])
+
+conv = img_conv_layer(
+    input=data,
+    filter_size=1,
+    filter_size_y=1,
+    num_channels=8,
+    num_filters=16,
+    stride=1,
+    bias_attr=True,
+    act=LinearActivation(),
+    groups=2,
+    layer_type="exconv")
+
+outputs(concat, conv)
diff --git a/python/paddle/trainer_config_helpers/tests/layers_test.py b/python/paddle/trainer_config_helpers/tests/layers_test.py
index 05902ea293..44d1c1c9b2 100644
--- a/python/paddle/trainer_config_helpers/tests/layers_test.py
+++ b/python/paddle/trainer_config_helpers/tests/layers_test.py
@@ -16,4 +16,6 @@ from paddle.trainer.config_parser import parse_config_and_serialize
 
 if __name__ == '__main__':
     parse_config_and_serialize(
-        'trainer_config_helpers/tests/layers_test_config.py', '')
+        'trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py',
+        '')
+# layers_test_config.py

From 760cb6c8615fe3eece8d831ef78e10da4b552246 Mon Sep 17 00:00:00 2001
From: Yu Yang <yuyang18@baidu.com>
Date: Wed, 23 Aug 2017 15:47:18 +0800
Subject: [PATCH 070/170] Follow jiayi's comment

---
 paddle/framework/operator.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h
index 83dab8631d..8397570d26 100644
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@@ -132,21 +132,21 @@ class OperatorBase {
 // Macro for define a clone method.
 // If you are writing an kernel operator, `Clone` will be defined when you
 // register it. i.e. `Clone` method is not needed to define by yourself.
-#define DEFINE_OP_CLONE_METHOD(CLS)                       \
+#define DEFINE_OP_CLONE_METHOD(cls)                       \
   std::unique_ptr<OperatorBase> Clone() const final {     \
-    return std::unique_ptr<OperatorBase>(new CLS(*this)); \
+    return std::unique_ptr<OperatorBase>(new cls(*this)); \
   }
 
 // Macro for define a default constructor for Operator.
 // You can also use
 //   using PARENT_CLASS::PARENT_CLASS;
 // to use parent's constructor.
-#define DEFINE_OP_CONSTRUCTOR(CLS, PARENT_CLS)             \
-  CLS(const std::string& type,                             \
+#define DEFINE_OP_CONSTRUCTOR(cls, parent_cls)             \
+  cls(const std::string& type,                             \
       const ::paddle::framework::VariableNameMap& inputs,  \
       const ::paddle::framework::VariableNameMap& outputs, \
       const paddle::framework::AttributeMap& attrs)        \
-      : PARENT_CLS(type, inputs, outputs, attrs) {}
+      : parent_cls(type, inputs, outputs, attrs) {}
 
 class NOP : public OperatorBase {
  public:

From 6f4b968f5618adce529d12bd2e3b72d4d1b64f61 Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Wed, 23 Aug 2017 00:58:02 -0700
Subject: [PATCH 071/170] can train the parameters

---
 python/paddle/v2/framework/tests/mnist.py | 39 +++++++++++++++++------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py
index 32349b8d4d..ededf767bc 100644
--- a/python/paddle/v2/framework/tests/mnist.py
+++ b/python/paddle/v2/framework/tests/mnist.py
@@ -69,7 +69,7 @@ def init_param(param_name, dims):
     tensor = var.get_tensor()
     tensor.set_dims(dims)
     data = numpy.random.uniform(
-        low=0.0, high=1.0, size=tensor.shape()).astype("float32")
+        low=-0.5, high=0.5, size=tensor.shape()).astype("float32")
     tensor.set(data, place)
 
 
@@ -109,7 +109,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None):
         bias_name = name + ".b"
         init_param(param_name=bias_name, dims=[size])
         sgd_optimizer(
-            net=optimize_net, param_name=bias_name, learning_rate=0.01)
+            net=optimize_net, param_name=bias_name, learning_rate=0.001)
         bias_out = name + ".rowwise_add.out"
         scope.new_var(bias_out)
         rowwise_append_op = Operator(
@@ -158,20 +158,33 @@ def print_inputs_outputs(op):
 
 
 def set_cost():
-    cost_data = numpy.array(scope.find_var("cross_entropy_1").get_tensor())
+    cost_shape = numpy.array(scope.find_var("cross_entropy_3").get_tensor(
+    )).shape
+    cost_grad = scope.find_var(grad_var_name("cross_entropy_3")).get_tensor()
+    cost_grad.set_dims(cost_shape)
+    cost_grad.alloc_float(place)
+    cost_grad.set(numpy.ones(cost_shape).astype("float32"), place)
+
+
+def print_cost():
+    cost_data = numpy.array(scope.find_var("cross_entropy_3").get_tensor())
     print(cost_data.sum() / len(cost_data))
 
-    cost_grad = scope.find_var(grad_var_name("cross_entropy_1")).get_tensor()
 
-    cost_grad.set_dims(cost_data.shape)
-    cost_grad.alloc_float(place)
-    cost_grad.set(numpy.ones(cost_data.shape).astype("float32"), place)
+def error_rate(predict, label):
+    predict_var = numpy.array(scope.find_var(predict).get_tensor()).argmax(
+        axis=1)
+    label = numpy.array(scope.find_var(label).get_tensor())
+    error_num = numpy.sum(predict_var != label)
+    print(error_num / float(len(label)))
 
 
 images = data_layer(name='pixel', dims=[BATCH_SIZE, 784])
 label = data_layer(name='label', dims=[BATCH_SIZE])
-fc = fc_layer(net=forward_network, input=images, size=10, act="softmax")
-cost = cross_entropy_layer(net=forward_network, input=fc, label=label)
+fc1 = fc_layer(net=forward_network, input=images, size=100, act="sigmoid")
+fc2 = fc_layer(net=forward_network, input=fc1, size=100, act="sigmoid")
+predict = fc_layer(net=forward_network, input=fc2, size=100, act="softmax")
+cost = cross_entropy_layer(net=forward_network, input=predict, label=label)
 
 forward_network.complete_add_op(True)
 backward_net = get_backward_net(forward_network)
@@ -192,8 +205,8 @@ reader = paddle.batch(
 
 PASS_NUM = 1000
 for pass_id in range(PASS_NUM):
+    batch_id = 0
 
-    print("pass[" + str(pass_id) + "]")
     for data in reader():
         image = numpy.array(map(lambda x: x[0], data)).astype("float32")
         label = numpy.array(map(lambda x: x[1], data)).astype("int32")
@@ -207,3 +220,9 @@ for pass_id in range(PASS_NUM):
         backward_net.run(scope, dev_ctx)
 
         optimize_net.run(scope, dev_ctx)
+        if batch_id % 100 == 0:
+            print("pass[" + str(pass_id) + "] batch_id[" + str(batch_id) + "]")
+            print_cost()
+            error_rate(predict, "label")
+
+        batch_id = batch_id + 1

From 48d87e5e912ad084ccc63dae8649f90a3f0989ba Mon Sep 17 00:00:00 2001
From: tensor-tang <jian.j.tang@intel.com>
Date: Wed, 23 Aug 2017 16:47:51 +0800
Subject: [PATCH 072/170] pass test, support input CPU device

---
 paddle/gserver/layers/Layer.h           |  35 +++++---
 paddle/gserver/layers/MKLDNNFcLayer.cpp | 108 +++++++++++++++---------
 paddle/gserver/layers/MKLDNNLayer.h     |  81 +++++++++++++++---
 paddle/math/Allocator.h                 |   6 ++
 paddle/math/MKLDNNMatrix.cpp            |  71 +++++++++++++---
 paddle/math/MKLDNNMatrix.h              |  49 ++++++++---
 6 files changed, 258 insertions(+), 92 deletions(-)

diff --git a/paddle/gserver/layers/Layer.h b/paddle/gserver/layers/Layer.h
index ec4d093e0c..edef36194a 100644
--- a/paddle/gserver/layers/Layer.h
+++ b/paddle/gserver/layers/Layer.h
@@ -82,6 +82,7 @@ protected:
   Argument output_;
   /// Several outputs stored on different devices, used in 'parallel_nn' case,
   /// and record them by deviceId_.
+  /// Also used in 'use_mkldnn' case.
   std::vector<Argument> outputOtherDevice_;
   /// If there are several outputs, map them by each name.
   std::map<std::string, Argument*> outputMap_;
@@ -177,6 +178,13 @@ protected:
     return inputLayer.getOutput(deviceId_);
   }
 
+  /**
+   * Get the argument of input layer with deviceId.
+   */
+  const Argument& getInput(size_t inputIndex, int deviceId) const {
+    return inputLayers_[inputIndex]->getOutput(deviceId);
+  }
+
   /**
    * Get the forward-input value.
    */
@@ -191,6 +199,13 @@ protected:
     return inputLayer.getOutput(deviceId_).value;
   }
 
+  /**
+   * Get the forward-input value with deviceId.
+   */
+  const MatrixPtr& getInputValue(int inputIndex, int deviceId) {
+    return inputLayers_[inputIndex]->getOutput(deviceId).value;
+  }
+
   /**
    * Get the forward-input grad.
    */
@@ -205,6 +220,13 @@ protected:
     return inputLayer.getOutput(deviceId_).grad;
   }
 
+  /**
+   * Get the forward-input grad.
+   */
+  const MatrixPtr& getInputGrad(int inputIndex, int deviceId) {
+    return inputLayers_[inputIndex]->getOutput(deviceId).grad;
+  }
+
   /**
    * Get the forward-input label.
    */
@@ -326,19 +348,6 @@ public:
     if (deviceId == getDeviceId()) {
       return output_;
     } else {
-      bool CPU2MKLDNN =
-          getDeviceId() == CPU_DEVICE && deviceId == MKLDNN_DEVICE;
-      bool MKLDNN2CPU =
-          getDeviceId() == MKLDNN_DEVICE && deviceId == CPU_DEVICE;
-      if (CPU2MKLDNN) {
-        // TODO: do something
-        return output_;
-      } else if (MKLDNN2CPU) {
-        // TODO: do something
-        return output_;
-      }
-
-      // TODO: handle mkldnn device or add mkldnn device to other
       for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
         if (outputOtherDevice_[i].deviceId == deviceId) {
           return outputOtherDevice_[i];
diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp
index 5463104469..a3291e6a8f 100644
--- a/paddle/gserver/layers/MKLDNNFcLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp
@@ -97,7 +97,7 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
 }
 
 void MKLDNNFcLayer::reshape() {
-  const Argument& input = getInput(0);
+  const Argument& input = getInput(0, getPrev(0)->getDeviceId());
   int batchSize = input.getBatchSize();
   if (bs_ == batchSize) {
     return;
@@ -135,35 +135,43 @@ void MKLDNNFcLayer::reshape() {
 
 void MKLDNNFcLayer::resetFwd() {
   bool hasBias = biases_ && biases_->getW();
-  const MatrixPtr& in = getInputValue(0);
   const MatrixPtr& wgt = weight_->getW();
   const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr;
   const MatrixPtr& out = output_.value;
 
-  if (getPrev(0)->getDeviceId() == MKLDNN_DEVICE) {
+  if (prevIsMKLDNN()) {
+    const MatrixPtr& in = getInputValue(0);
     inVal_ = std::dynamic_pointer_cast<MKLDNNMatrix>(in);
     CHECK(inVal_) << "Input should be MKLDNNMatrix";
-    // TODO:  change input nchw to nc if available
-    // inVal_->downSpatial()
   } else {
+    CHECK_EQ(getPrev(0)->getDeviceId(), CPU_DEVICE) << "Only support CPU yet";
+    const MatrixPtr& in = getInputValue(0, CPU_DEVICE);
     inVal_ = MKLDNNMatrix::create(
-        in,
-        hasSpatial_ ? memory::dims{bs_, ic_, ih_, iw_} : memory::dims{bs_, ic_},
-        hasSpatial_ ? format::nchw : format::nc,
-        engine_);
+        in, memory::dims{bs_, ic_, ih_, iw_}, format::nchw, engine_);
   }
-
+  inVal_->downSpatial();
   wgtVal_ = MKLDNNMatrix::create(
-      wgt,
-      hasSpatial_ ? memory::dims{oc_, ic_, ih_, iw_} : memory::dims{oc_, ic_},
-      hasSpatial_ ? format::oihw : format::oi,
-      engine_);
+      wgt, memory::dims{oc_, ic_, ih_, iw_}, format::oihw, engine_);
+  wgtVal_->downSpatial();
   biasVal_ =
       hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr;
   outVal_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_);
 
-  // change original output to mkldnn output
+  // change original output value to mkldnn output value
   output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
+  if (!nextIsMKLDNN()) {
+    Argument cpuOutput;
+    for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
+      if (outputOtherDevice_[i].deviceId == CPU_DEVICE) {
+        cpuOutput = outputOtherDevice_[i];
+      }
+    }
+    cpuOutput.setFrameHeight(output_.getFrameHeight());
+    cpuOutput.setFrameWidth(output_.getFrameWidth());
+
+    // fc cpu output value do not need convert
+    cpuOutput.value = output_.value;
+  }
 
   // create forward handle
   prop_kind pk = prop_kind::forward;
@@ -176,12 +184,13 @@ void MKLDNNFcLayer::resetFwd() {
               : fc_fwd::desc(
                     pk, inVal_->getMD(), wgtVal_->getMD(), outVal_->getMD());
   fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
-
   if (hasBias) {
     fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_));
   } else {
     fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_));
   }
+  printValueFormatFlow();
+
   pipelineFwd_.clear();
   pipelineFwd_.push_back(*fwd_);
 }
@@ -197,17 +206,24 @@ void MKLDNNFcLayer::resetBwd() {
   CHECK(inVal_) << "Should have input value";
   const MatrixPtr& wgt = weight_->getWGrad();
   const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr;
-  const MatrixPtr& out = output_.grad;
 
-  wgtGrad_ = MKLDNNMatrix::create(
-      wgt, wgtVal_->getDims(), wgtVal_->getFormat(), engine_);
-  biasGrad_ =
-      hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr;
+  if (nextIsMKLDNN()) {
+    // can not directly cast outputgrad to mkldnnmatrix,
+    // since each layer can not write the inputgrad to mkldnn inputgrad.
+    // So just create from matrix with outputvalue format.
+    const MatrixPtr& out = getOutput(MKLDNN_DEVICE).grad;
+    outGrad_ = MKLDNNMatrix::create(out, outVal_->getPD());
+    // TODO: maybe need merge topdiffs
+  } else {
+    // TODO: merge topdiffs
+    const MatrixPtr& out = getOutput(CPU_DEVICE).grad;
+    // fc do not need to convert from cpu device since output always nc
+    // only need create from cpu device
+    outGrad_ = MKLDNNMatrix::create(out, outVal_->getPD());
+  }
 
-  outGrad_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_);
-  // change original output to mkldnn output
-  // TODO: right?
-  output_.grad = std::dynamic_pointer_cast<Matrix>(outGrad_);
+  wgtGrad_ = MKLDNNMatrix::create(wgt, wgtVal_->getPD());
+  biasGrad_ = hasBias ? MKLDNNMatrix::create(bias, biasVal_->getPD()) : nullptr;
 
   // create memory primitive desc
   fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward,
@@ -235,21 +251,38 @@ void MKLDNNFcLayer::resetBwd() {
   pipelineBwd_.push_back(*bwdWgt_);
 
   /// backward data
-  const MatrixPtr& in = getInputGrad(0);
-  if (in == nullptr) {
-    return;
+  if (prevIsMKLDNN()) {
+    const MatrixPtr& in = getInputGrad(0, MKLDNN_DEVICE);
+    if (in == nullptr) {
+      return;
+    }
+    if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) {
+      // TODO: many mkldnn bots
+      // add sum handle
+    } else {
+      inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD());
+    }
+  } else {
+    const MatrixPtr& in = getInputGrad(0, CPU_DEVICE);
+    if (in == nullptr) {
+      return;
+    }
+    if (getInput(0, CPU_DEVICE).getAllCount() > 1) {
+      // TODO: many  bots
+      // add sum handle
+    } else {
+      inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD());
+    }
   }
+
   fc_bwdData::desc bwdDataDesc =
       fc_bwdData::desc(inVal_->getMD(), wgtGrad_->getMD(), outGrad_->getMD());
   fc_bwdData::primitive_desc bwdDataPD =
       fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD);
 
-  // TODO: check right, just from ingrad?
-  inGrad_ =
-      MKLDNNMatrix::create(in, inVal_->getDims(), inVal_->getFormat(), engine_);
-
   CHECK(wgtVal_) << "Should have weight memory";
   bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_));
+  printGradFormatFlow();
   pipelineBwd_.push_back(*bwdData_);
 }
 
@@ -259,11 +292,7 @@ void MKLDNNFcLayer::forward(PassType passType) {
 
   {
     REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str());
-
-    // update input data
-    // since it might be changed if this is after data layer
-    real* iData = getInputValue(0)->getData();
-    inVal_->updateData(iData);
+    syncInputValue();
 
     // just submit forward pipeline
     stream_->submit(pipelineFwd_);
@@ -285,10 +314,7 @@ void MKLDNNFcLayer::backward(const UpdateCallback& callback) {
     REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str());
     resetBwd();
 
-    // update diff
-    real* oDiff = getOutputGrad()->getData();
-    outGrad_->updateData(oDiff);
-
+    syncOutputGrad();
     // just sumbmit backward pipeline
     stream_->submit(pipelineBwd_);
   }
diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h
index fbd62d9aaa..3dd17a36ff 100644
--- a/paddle/gserver/layers/MKLDNNLayer.h
+++ b/paddle/gserver/layers/MKLDNNLayer.h
@@ -125,23 +125,80 @@ public:
                        << ", oh: " << oh_ << ", ow: " << ow_;
   }
 
-  // TODO(TJ): move to MkldnnMatrix
-  // create memory desc
-  inline mkldnn::memory::desc createMD(
-      mkldnn::memory::dims dims,
-      mkldnn::memory::format fmt,
-      mkldnn::memory::data_type type = mkldnn::memory::data_type::f32) {
-    // TODO(TJ): isFmtSuppoted(fmt)
-    return mkldnn::memory::desc(dims, type, fmt);
+  /**
+   * Print the mkldnn memory format flow of value
+   */
+  virtual void printValueFormatFlow() {
+    if (inVal_ && outVal_) {
+      VLOG(MKLDNN_FMTS) << "value format flow --- " << inVal_->getFormat()
+                        << " >>> " << outVal_->getFormat();
+    }
   }
 
-  void resetMKLDNNOutput(size_t height, size_t width) {
-    Layer::resetOutput(height, width);
-    // get valu and grad, use mkldnn matrix instaed
-    // output_.value;
+  /**
+   * Print the mkldnn memory format flow of grad
+   */
+  virtual void printGradFormatFlow() {
+    if (inGrad_ && outGrad_) {
+      VLOG(MKLDNN_FMTS) << "grad format flow --- " << inGrad_->getFormat()
+                        << " <<< " << outGrad_->getFormat();
+    }
   }
 
 protected:
+  /**
+   * If next layer only has MKLDNN type.
+   * Otherwise, only support otherdevice CPU device.
+   */
+  bool nextIsMKLDNN() {
+    for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
+      CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE)
+          << "Only support other device is CPU yet";
+    }
+    return outputOtherDevice_.size() == 0;
+  }
+
+  /**
+   * Is previous layer MKLDNN type.
+   * Otherwise, only support otherdevice CPU device.
+   */
+  bool prevIsMKLDNN(int index = 0) {
+    int prevDevice = getPrev(index)->getDeviceId();
+    if (prevDevice == MKLDNN_DEVICE) {
+      return true;
+    } else {
+      // do not support GPU yet
+      CHECK_EQ(prevDevice, CPU_DEVICE) << "Only support CPU yet";
+      return false;
+    }
+  }
+
+  /**
+   * Sync input value data
+   */
+  void syncInputValue() {
+    if (prevIsMKLDNN()) {
+      return;
+    }
+    real* iData = getInputValue(0, CPU_DEVICE)->getData();
+    // update input data
+    // since it might be changed if this is after data layer
+    inVal_->updateData(iData);
+  }
+
+  /**
+   * Sync output grad data
+   */
+  void syncOutputGrad() {
+    if (nextIsMKLDNN()) {
+      return;
+    }
+
+    // update diff
+    real* oDiff = getOutput(CPU_DEVICE).grad->getData();
+    outGrad_->updateData(oDiff);
+  }
+
   /**
    * Set deviceId of this layer.
    */
diff --git a/paddle/math/Allocator.h b/paddle/math/Allocator.h
index 666a8b8368..94ef561f06 100644
--- a/paddle/math/Allocator.h
+++ b/paddle/math/Allocator.h
@@ -48,7 +48,13 @@ public:
    */
   virtual void* alloc(size_t size) {
     void* ptr;
+#ifdef PADDLE_USE_MKLDNN
+    // refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp
+    // memory alignment
+    CHECK_EQ(posix_memalign(&ptr, 4096ul, size), 0);
+#else
     CHECK_EQ(posix_memalign(&ptr, 32ul, size), 0);
+#endif
     CHECK(ptr) << "Fail to allocate CPU memory: size=" << size;
     return ptr;
   }
diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp
index 44fc54278c..24d54ec0f7 100644
--- a/paddle/math/MKLDNNMatrix.cpp
+++ b/paddle/math/MKLDNNMatrix.cpp
@@ -18,29 +18,74 @@ using namespace mkldnn;  // NOLINT
 
 namespace paddle {
 
-MKLDNNMatrixPtr MKLDNNMatrix::create(const MatrixPtr& m,
-                                     memory::dims dims,
-                                     memory::format fmt,
-                                     engine& eg,
-                                     mkldnn::memory::data_type dtype) {
-  CpuMatrixPtr cpuM = std::dynamic_pointer_cast<CpuMatrix>(m);
-  CHECK(cpuM) << "Only support create from CPU matrix yet";
-
-  size_t ndims = dims.size();
+MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) {
+  memory::desc md = pd.desc();
+  size_t ndims = md.data.ndims;
+  int* dims = md.data.dims;
   CHECK(ndims > 0) << "Input dims should not be empty";
-  size_t cnt = 1;
+  size_t cnts = 1;
   for (size_t i = 0; i < ndims; ++i) {
-    cnt *= dims[i];
+    cnts *= dims[i];
   }
-  CHECK_EQ(cnt, m->getElementCnt()) << "Count size does not match";
 
+  if (m == nullptr) {
+    size_t height = dims[0];
+    size_t width = cnts / dims[0];
+    // LOG(INFO) << height << "," << width;
+    m = Matrix::create(height, width, false, false);
+  }
+
+  CHECK(m) << " Matrix should not be empty";
+  CpuMatrixPtr cpuMatrix = std::dynamic_pointer_cast<CpuMatrix>(m);
+  CHECK(cpuMatrix) << "Only support create from CPU matrix yet";
+
+  CHECK_EQ(cnts, m->getElementCnt()) << "Count size does not match";
   size_t width = m->getWidth();
   size_t height = m->getHeight();
   real* data = m->getData();
+  return std::make_shared<MKLDNNMatrix>(data, height, width, pd);
+}
 
+MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m,
+                                     memory::dims dims,
+                                     memory::format fmt,
+                                     engine& eg,
+                                     mkldnn::memory::data_type dtype) {
   memory::desc md = memory::desc(dims, dtype, fmt);
   memory::primitive_desc pd = memory::primitive_desc(md, eg);
-  return std::make_shared<MKLDNNMatrix>(data, height, width, pd);
+  return create(m, pd);
+}
+
+void MKLDNNMatrix::downSpatial() {
+  int fmt = getFormat();
+  if (!(fmt == memory::format::nchw || fmt == memory::format::oihw)) {
+    // only support nchw and oihw yet, later can support more like nhwc, ihwo
+    return;
+  }
+
+  memory::dims srcDims = getDims();
+  const int H = 2, W = 3;
+  if (srcDims[H] != 1 || srcDims[W] != 1) {
+    // can not down spatial
+    return;
+  }
+
+  memory::dims dstDims = memory::dims{srcDims[0], srcDims[1]};
+  memory::format dstFmt;
+  switch (fmt) {
+    case memory::format::nchw:
+      dstFmt = memory::format::nc;
+      break;
+    case memory::format::oihw:
+      dstFmt = memory::format::oi;
+      break;
+    default:
+      LOG(FATAL) << "unsupported format";
+  }
+  memory::desc md = memory::desc(dstDims, getDtype(), dstFmt);
+  memory::primitive_desc pd = memory::primitive_desc(md, getEngine());
+  void* data = getData();
+  memory(pd, data);
 }
 
 }  // namespace paddle
diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h
index 54c0a1fdcb..05adc867c2 100644
--- a/paddle/math/MKLDNNMatrix.h
+++ b/paddle/math/MKLDNNMatrix.h
@@ -39,20 +39,37 @@ public:
                mkldnn::memory::primitive_desc pd)
       : CpuMatrix(data, height, width, false), mkldnn::memory(pd, data) {}
 
-  MKLDNNMatrix(size_t height, size_t width, mkldnn::memory::primitive_desc pd)
-      : CpuMatrix(height, width, false), mkldnn::memory(pd) {
-    set_data_handle(CpuMatrix::getData());
-  }
-
   ~MKLDNNMatrix() {}
 
+  /**
+   * Create MKLDNNMatrix from a MatrixPtr and memory primitive_desc
+   */
+  static MKLDNNMatrixPtr create(MatrixPtr m, mkldnn::memory::primitive_desc pd);
+
+  /**
+   * Create MKLDNNMatrix from a MatrixPtr and memory details info
+   */
   static MKLDNNMatrixPtr create(
-      const MatrixPtr& m,
+      MatrixPtr m,
       mkldnn::memory::dims dims,
       mkldnn::memory::format fmt,
       mkldnn::engine& eg,
       mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32);
 
+public:
+  /**
+   * Dimensionality reduction.
+   * Change format "nchw --> nc" or "oihw --> oi" if the h and w are both 1
+   */
+  void downSpatial();
+
+  /**
+   * Update the memory data handle.
+   * Caution: This will not check the buffer size of the data,
+   *          it should be coverd by user.
+   */
+  void updateData(void* data) { set_data_handle(data); }
+
   /**
    * Get primitive descriptor.
    */
@@ -64,12 +81,13 @@ public:
   mkldnn::memory::desc getMD() { return getPD().desc(); }
 
   /**
-   * Get dims.
+   * Get dimensions.
    */
   mkldnn::memory::dims getDims() {
+    mkldnn::memory::desc md = getMD();
+    const int* src = md.data.dims;
+    int ndims = md.data.ndims;
     mkldnn::memory::dims dst;
-    int* src = getMD().data.dims;
-    int ndims = getMD().data.ndims;
     dst.resize(ndims);
     for (int i = 0; i < ndims; ++i) {
       dst[i] = src[i];
@@ -85,11 +103,16 @@ public:
   }
 
   /**
-   * Update the memory data handle.
-   * Caution: This will not check the buffer size of the data,
-   *          it should be coverd by user.
+   * Get memory data type.
    */
-  void updateData(void* data) { set_data_handle(data); }
+  mkldnn::memory::data_type getDtype() {
+    return (mkldnn::memory::data_type)(getMD().data.data_type);
+  }
+
+  /**
+   * Get engine.
+   */
+  mkldnn::engine getEngine() { return getPD().get_engine(); }
 };
 
 }  // namespace paddle

From 780c8d969e0d2d220df19a672c141ff7c44f53d2 Mon Sep 17 00:00:00 2001
From: tensor-tang <jian.j.tang@intel.com>
Date: Wed, 23 Aug 2017 17:03:16 +0800
Subject: [PATCH 073/170] make downSpatial work, and remove hasSpatial_

---
 paddle/gserver/layers/MKLDNNFcLayer.cpp | 4 ----
 paddle/gserver/layers/MKLDNNFcLayer.h   | 5 +----
 paddle/math/MKLDNNMatrix.cpp            | 7 ++++++-
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp
index a3291e6a8f..a5555c4618 100644
--- a/paddle/gserver/layers/MKLDNNFcLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp
@@ -111,10 +111,6 @@ void MKLDNNFcLayer::reshape() {
   if (iw_ == 0) {
     iw_ = 1;
   }
-  hasSpatial_ = true;
-  if (ih_ == 1 && iw_ == 1) {
-    hasSpatial_ = false;
-  }
   CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize());
   ic_ = iLayerSize_ / (ih_ * iw_);
   CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible";
diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h
index 7954852a23..e2657a8d5e 100644
--- a/paddle/gserver/layers/MKLDNNFcLayer.h
+++ b/paddle/gserver/layers/MKLDNNFcLayer.h
@@ -32,16 +32,13 @@ protected:
   // if has already init the weight
   bool hasInitedWgt_;
 
-  // if input layer has image size info (ih>1 && iw>1)
-  bool hasSpatial_;
-
   // fc weight and bias
   std::unique_ptr<Weight> weight_;
   std::unique_ptr<Weight> biases_;
 
 public:
   explicit MKLDNNFcLayer(const LayerConfig& config)
-      : MKLDNNLayer(config), hasInitedWgt_(false), hasSpatial_(true) {}
+      : MKLDNNLayer(config), hasInitedWgt_(false) {}
 
   ~MKLDNNFcLayer() {}
 
diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp
index 24d54ec0f7..94df9c1550 100644
--- a/paddle/math/MKLDNNMatrix.cpp
+++ b/paddle/math/MKLDNNMatrix.cpp
@@ -85,7 +85,12 @@ void MKLDNNMatrix::downSpatial() {
   memory::desc md = memory::desc(dstDims, getDtype(), dstFmt);
   memory::primitive_desc pd = memory::primitive_desc(md, getEngine());
   void* data = getData();
-  memory(pd, data);
+  mkldnn_primitive_t result;
+  mkldnn::error::wrap_c_api(
+      mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr),
+      "could not create a memory primitive");
+  reset(result);
+  set_data_handle(data);
 }
 
 }  // namespace paddle

From bfcaf880d0eed61291f0483091382131ef6cde88 Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Wed, 23 Aug 2017 18:48:05 +0800
Subject: [PATCH 074/170] Move pybind from package paddle/framework into
 paddle/pybind.

---
 paddle/CMakeLists.txt                    |  1 +
 paddle/framework/CMakeLists.txt          | 20 --------------------
 paddle/pybind/CMakeLists.txt             | 19 +++++++++++++++++++
 paddle/{framework => pybind}/pybind.cc   | 18 ++++++++++--------
 paddle/{framework => pybind}/tensor_py.h | 11 +++++++----
 5 files changed, 37 insertions(+), 32 deletions(-)
 create mode 100644 paddle/pybind/CMakeLists.txt
 rename paddle/{framework => pybind}/pybind.cc (95%)
 rename paddle/{framework => pybind}/tensor_py.h (92%)

diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt
index cf61a243e9..ec866b2907 100644
--- a/paddle/CMakeLists.txt
+++ b/paddle/CMakeLists.txt
@@ -15,6 +15,7 @@ if(Boost_FOUND)
   add_subdirectory(platform)
   add_subdirectory(framework)
   add_subdirectory(operators)
+  add_subdirectory(pybind)
 endif()
 
 if(WITH_C_API)
diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index ad219887d6..c0838d9b75 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -39,23 +39,3 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
 
 cc_library(backward SRCS backward.cc DEPS net_op)
 cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context)
-
-if(WITH_PYTHON)
-cc_library(paddle_pybind SHARED
-    SRCS pybind.cc
-    DEPS pybind python backward
-    sgd_op
-    gather_op
-    add_op
-    mul_op
-    rowwise_add_op
-    sigmoid_op
-    softmax_op
-    mean_op
-    cross_entropy_op
-    recurrent_op
-    uniform_random_op
-    gaussian_random_op
-    fill_zeros_like_op
-    scale_op)
-endif(WITH_PYTHON)
diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt
new file mode 100644
index 0000000000..10be83efc6
--- /dev/null
+++ b/paddle/pybind/CMakeLists.txt
@@ -0,0 +1,19 @@
+if(WITH_PYTHON)
+cc_library(paddle_pybind SHARED
+    SRCS pybind.cc
+    DEPS pybind python backward
+    sgd_op
+    gather_op
+    add_op
+    mul_op
+    rowwise_add_op
+    sigmoid_op
+    softmax_op
+    mean_op
+    cross_entropy_op
+    recurrent_op
+    uniform_random_op
+    gaussian_random_op
+    fill_zeros_like_op
+    scale_op)
+endif(WITH_PYTHON)
diff --git a/paddle/framework/pybind.cc b/paddle/pybind/pybind.cc
similarity index 95%
rename from paddle/framework/pybind.cc
rename to paddle/pybind/pybind.cc
index b5ae81ebca..cdf739c3a2 100644
--- a/paddle/framework/pybind.cc
+++ b/paddle/pybind/pybind.cc
@@ -18,11 +18,11 @@ limitations under the License. */
 
 #include "paddle/framework/backward.h"
 #include "paddle/framework/op_registry.h"
-#include "paddle/framework/tensor_py.h"
 #include "paddle/operators/net_op.h"
 #include "paddle/operators/recurrent_op.h"
 #include "paddle/platform/enforce.h"
 #include "paddle/platform/place.h"
+#include "paddle/pybind/tensor_py.h"
 #include "paddle/string/to_string.h"
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
@@ -134,7 +134,8 @@ All parameter, weight, gradient are variables in Paddle.
            py::return_value_policy::reference)
       .def("find_var", &Scope::FindVar, py::return_value_policy::reference)
       .def(py::init<>())
-      .def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); },
+      .def("new_scope",
+           [](Scope &self) -> Scope * { return &self.NewScope(); },
            py::return_value_policy::reference)
       .def("drop_kids", &Scope::DropKids);
 
@@ -222,8 +223,10 @@ All parameter, weight, gradient are variables in Paddle.
                     retv->SetType("plain_net");
                     return retv;
                   })
-      .def("append_op", [](operators::NetOp &self,
-                           const OperatorBase &op) { self.AppendOp(op); })
+      .def("append_op",
+           [](operators::NetOp &self, const OperatorBase &op) {
+             self.AppendOp(op);
+           })
       .def("complete_add_op", &operators::NetOp::CompleteAddOp)
       .def("complete_add_op", [](std::shared_ptr<operators::NetOp> &self) {
         self->CompleteAddOp();
@@ -243,10 +246,9 @@ All parameter, weight, gradient are variables in Paddle.
             auto rnn_op = OpRegistry::CreateOp(desc);
             return static_cast<operators::RecurrentOp *>(rnn_op.release());
           })
-      .def("set_stepnet", [](operators::RecurrentOp &self,
-                             const operators::NetOp &net) -> void {
-        self.set_stepnet(net.Clone());
-      });
+      .def("set_stepnet",
+           [](operators::RecurrentOp &self, const operators::NetOp &net)
+               -> void { self.set_stepnet(net.Clone()); });
 
   m.def("unique_integer", UniqueIntegerGenerator);
 
diff --git a/paddle/framework/tensor_py.h b/paddle/pybind/tensor_py.h
similarity index 92%
rename from paddle/framework/tensor_py.h
rename to paddle/pybind/tensor_py.h
index 4e1ab77b15..39ba60b4dc 100644
--- a/paddle/framework/tensor_py.h
+++ b/paddle/pybind/tensor_py.h
@@ -63,8 +63,11 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
       }
       return py::buffer_info(
           dst_tensor.mutable_data<CUR_TYPE>(dst_tensor.holder_->place()),
-          sizeof(CUR_TYPE), py::format_descriptor<CUR_TYPE>::format(),
-          (size_t)framework::arity(dst_tensor.dims()), dims_outside, strides);
+          sizeof(CUR_TYPE),
+          py::format_descriptor<CUR_TYPE>::format(),
+          (size_t)framework::arity(dst_tensor.dims()),
+          dims_outside,
+          strides);
     } else {
       constexpr bool less = I + 1 < std::tuple_size<std::tuple<ARGS...>>::value;
       return CastToPyBufferImpl<less, I + 1, ARGS...>()(tensor);
@@ -107,8 +110,8 @@ void PyCUDATensorSetFromArray(
 
   self.Resize(framework::make_ddim(dims));
   auto *dst = self.mutable_data<T>(place);
-  paddle::platform::GpuMemcpySync(dst, array.data(), sizeof(T) * array.size(),
-                                  cudaMemcpyHostToDevice);
+  paddle::platform::GpuMemcpySync(
+      dst, array.data(), sizeof(T) * array.size(), cudaMemcpyHostToDevice);
 }
 #endif
 

From e3342ff8e79fbe1cacb8fa5a66cb9c69cba1eeb9 Mon Sep 17 00:00:00 2001
From: wanghaoshuang <wanghaoshuang@baidu.com>
Date: Wed, 23 Aug 2017 19:30:46 +0800
Subject: [PATCH 075/170] Fix android build error.

---
 paddle/cuda/include/stub/hl_cuda_cudnn_stub.h  | 11 ++++++++---
 paddle/cuda/src/hl_cuda_cudnn.cc               |  3 ++-
 paddle/gserver/tests/test_LayerGrad.cpp        |  6 ++++--
 python/paddle/trainer/config_parser.py         |  6 +++---
 python/paddle/trainer_config_helpers/layers.py |  7 +++----
 5 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h b/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h
index abd0d6b099..3afcc6fa85 100644
--- a/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h
+++ b/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h
@@ -78,7 +78,9 @@ inline void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
                                              int padding_height,
                                              int padding_width,
                                              int stride_height,
-                                             int stride_width) {}
+                                             int stride_width,
+                                             int dilation_h,
+                                             int dilation_w) {}
 
 inline void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
                                             hl_tensor_descriptor image,
@@ -86,7 +88,9 @@ inline void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
                                             int padding_height,
                                             int padding_width,
                                             int stride_height,
-                                            int stride_width) {}
+                                            int stride_width,
+                                            int dilation_h,
+                                            int dilation_w) {}
 
 inline void hl_destroy_convolution_descriptor(hl_convolution_descriptor conv) {}
 
@@ -99,7 +103,8 @@ inline void hl_conv_workspace(hl_tensor_descriptor input,
                               int* convBwdDataAlgo,
                               size_t* bwdDataLimitBytes,
                               int* convBwdFilterAlgo,
-                              size_t* bwdFilterLimitBytes) {}
+                              size_t* bwdFilterLimitBytes,
+                              bool useDilation) {}
 
 inline void hl_convolution_forward(hl_tensor_descriptor input,
                                    real* input_data,
diff --git a/paddle/cuda/src/hl_cuda_cudnn.cc b/paddle/cuda/src/hl_cuda_cudnn.cc
index f55fa523e1..f38ef69255 100644
--- a/paddle/cuda/src/hl_cuda_cudnn.cc
+++ b/paddle/cuda/src/hl_cuda_cudnn.cc
@@ -640,7 +640,8 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
 #else
   if (dilation_h > 1 || dilation_w > 1) {
     LOG(FATAL)
-        << "Current cudnn version does't support for dilation convolution.";
+        << "Current cuDNN version does't support for dilation convolution. "
+        << "The dilation convolution requires cuDNN >= v6.0.";
   }
 
   CHECK_CUDNN(dynload::cudnnSetConvolution2dDescriptor(hl_conv->desc,
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index 9348c47bd4..9946f76664 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifndef PADDLE_ONLY_CPU
 #include <cudnn.h>
+#endif
 #include <gtest/gtest.h>
 #include <string>
 #include <vector>
@@ -262,8 +264,8 @@ TEST(Projection, conv) {
   testProjectionConv(1, false);
   testProjectionConv(3, false);
   /// test ConvTransProjection
-  /// testProjectionConv(1, true);
-  /// testProjectionConv(3, true);
+  testProjectionConv(1, true);
+  testProjectionConv(3, true);
 }
 #endif
 
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 33a20afb18..ddfd615d84 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -862,7 +862,6 @@ class Conv(Cfg):
                  filter_size,
                  channels,
                  padding=None,
-                 dilation=None,
                  stride=None,
                  groups=None,
                  filter_channels=None,
@@ -871,8 +870,9 @@ class Conv(Cfg):
                  caffe_mode=True,
                  filter_size_y=None,
                  padding_y=None,
-                 dilation_y=None,
-                 stride_y=None):
+                 stride_y=None,
+                 dilation=None,
+                 dilation_y=None):
         self.add_keys(locals())
         if filter_size_y is None:
             self.filter_size_y = filter_size
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 74b88cd4f8..9876798558 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -2340,7 +2340,7 @@ def img_conv_layer(input,
                    groups=1,
                    stride=1,
                    padding=0,
-                   dilation=0,
+                   dilation=1,
                    bias_attr=None,
                    param_attr=None,
                    shared_biases=True,
@@ -2472,9 +2472,6 @@ def img_conv_layer(input,
         else:
             dilation_y = dilation
 
-    if dilation > 1 or dilation_y > 1:
-        assert layer_type in ["cudnn_conv", "cudnn_convt"]
-
     if param_attr.attr.get('initial_smart'):
         # special initial for conv layers.
         init_w = (2.0 / (filter_size**2 * num_channels))**0.5
@@ -2484,6 +2481,8 @@ def img_conv_layer(input,
         param_attr.attr["initial_smart"] = False
 
     if layer_type:
+        if dilation > 1 or dilation_y > 1:
+            assert layer_type in ["cudnn_conv", "cudnn_convt"]
         if trans:
             assert layer_type in ["exconvt", "cudnn_convt"]
         else:

From 5e59ca7ccc8232b2028cfc8b4cffe19ffc73ba18 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Wed, 23 Aug 2017 10:40:46 +0800
Subject: [PATCH 076/170] fix config helper.

---
 .../gserver/layers/CrossEntropyOverBeam.cpp   |  10 ++
 paddle/gserver/layers/CrossEntropyOverBeam.h  |  16 ++-
 .../tests/test_CrossEntropyOverBeamGrad.cpp   |  22 ++-
 python/paddle/trainer/config_parser.py        |  12 +-
 .../paddle/trainer_config_helpers/layers.py   | 129 +++++++++++++++---
 .../test_cross_entropy_over_beam.protostr     |  17 ++-
 .../configs/test_cross_entropy_over_beam.py   |  18 ++-
 7 files changed, 162 insertions(+), 62 deletions(-)

diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
index 09258fb305..f7736f0ce9 100644
--- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
@@ -161,7 +161,17 @@ real CostForOneSequence::forward() {
 }
 
 void CostForOneSequence::backward() {
+  /*
+   * when softmax layer is the output layer, and it is combined with
+   * cross-entropy as cost. The derivate with regard to softmax's input
+   * is simply:
+   *
+   * grad_i = softmax_out_i - target_i,
+   *
+   * and here hard label is used.
+   */
   softmaxOut_->getData()[goldIdsInFinalExpansion_] -= 1.;
+
   MatrixPtr tmp = Matrix::create(
       softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false);
 
diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.h b/paddle/gserver/layers/CrossEntropyOverBeam.h
index 96a5df7dfb..5d0cffee3c 100644
--- a/paddle/gserver/layers/CrossEntropyOverBeam.h
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.h
@@ -19,8 +19,8 @@ limitations under the License. */
 
 namespace paddle {
 
+/* This struct stores the beams in all search steps for a single sequence. */
 struct BeamExpansion {
-  // store the entire beam expansion for a single sequence
   std::vector<MatrixPtr> scores;
   std::vector<IVectorPtr> seqInfo;
 
@@ -111,8 +111,11 @@ private:
   size_t batchSize_;
   size_t beamSize_;
 
-  // Currently, this layer only works on CPU, if its inputs is on GPU,
-  // copy them to CPU memory.
+  /*
+   * the process of constructing beams is not friendly to GPU, currently, this
+   * layer only runs on CPU, if any of its inputs is on GPU memory, then copy
+   * it to CPU memory.
+   */
   std::vector<MatrixPtr> candidateScores_;
   std::vector<MatrixPtr> candidateScoreGrad_;
   std::vector<MatrixPtr> candidateInBeam_;
@@ -120,9 +123,12 @@ private:
   std::vector<IVectorPtr> goldSequence_;
   std::vector<std::vector<int>> beamSplitPos_;
 
-  // split entire bath of beams into beam per sequnence.
+  /*
+   * split entire bath of beams into beam per sequnence and store the result
+   * into this member.
+   */
   std::vector<BeamExpansion> beamPerSeq_;
-  // beamCosts_ is used to propagate error in one sequence.
+  /* beamCosts_ is used to propagate error in one sequence. */
   std::vector<CostForOneSequence> beamCosts_;
 };
 
diff --git a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
index 506a4281df..538d18cdc3 100644
--- a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
+++ b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
@@ -28,16 +28,10 @@ using namespace paddle;  // NOLINT
 DECLARE_int32(gpu_id);
 DECLARE_bool(thread_local_rand_use_global_seed);
 
-// const size_t MAX_SEQ_NUM = 5;
-// const size_t MAX_SEQ_LEN = 10;
-// const size_t MAX_BEAM_SIZE = 3;
-
 const size_t MAX_SEQ_NUM = 23;
 const size_t MAX_SEQ_LEN = 50;
 const size_t MAX_BEAM_SIZE = 27;
 
-// const size_t SEED = 1503391792;
-// const size_t SEED = 1;
 const size_t SEED = (size_t)(time(NULL));
 
 struct SingleBeamExpansion {
@@ -176,10 +170,12 @@ void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
   beam.resetGroundTruth(seqNum);
   for (size_t i = 0; i < seqNum; ++i) {
     if (randFloat() > 0.5) {
-      // force the randomly generated label falls in the beam by chance 0.5.
-      // otherwise, when sequence length is relatively long and beam size is
-      // relatively small, the gold sequences falls off the beam at in
-      // the first search.
+      /*
+       * force the randomly generated label falls in the beam by chance 0.5.
+       * otherwise, when sequence length is relatively long and beam size is
+       * relatively small, the gold sequences falls off the beam at in the
+       * first search.
+       */
       real* begPos = beam.selectedIndices.data() + i * beamSize;
       beam.colIdxInBeam[i] =
           rand() % count_if(begPos, begPos + beamSize, [](const real& val) {
@@ -222,9 +218,7 @@ void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
 
       if (randFloat() > 0.5) {
         // force the randomly generated label falls in the beam by chance 0.5.
-        // otherwise, when sequence length is relatively long and beam size is
-        // relatively small, the gold sequences falls off the beam at in
-        // the first search.
+
         real* start =
             curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize;
         int n = rand() % count_if(start, start + beamSize, [](const real& val) {
@@ -339,7 +333,7 @@ TEST(Layer, CrossEntropyOverBeam) {
   const size_t beamSize = 1 + rand() % MAX_BEAM_SIZE;
   LOG(INFO) << "beamSize = " << beamSize;
 
-  // TODO(caoying): test with more beam expansions.
+  // TODO(caoying): test with random beam expansions.
   const size_t expansionCount = 3;
   vector<SingleBeamExpansion> beams;
   genRandomBeamExpansion(expansionCount, beamSize, beams);
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 7707ece819..579713546f 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -1605,16 +1605,16 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase):
 @config_layer('cross_entropy_over_beam')
 class CrossEntropyOverBeamLayer(LayerBase):
     def __init__(self, name, inputs, **xargs):
-        config_assert(len(inputs) % 3 == 0, "Error input numbers.")
+        config_assert(len(inputs) % 3 == 0, "Error input number.")
         super(CrossEntropyOverBeamLayer, self).__init__(
             name, 'cross_entropy_over_beam', 0, inputs, **xargs)
         input_num = len(inputs) / 3
         for i in range(input_num):
-            input_layer = self.get_input_layer(i * 2)
-            config_assert(
-                input_layer.size == 1, "Inputs for this layer are made up of "
-                "several pairs and the first one in a pair is scores for "
-                "all the candidates, so its size should be equal to 1.")
+            input_layer = self.get_input_layer(i * 3)
+            config_assert(input_layer.size == 1, (
+                "Inputs for this layer are made up of "
+                "several triples, in which the first one is scores over "
+                "all candidate paths, whose size should be equal to 1."))
 
 
 @config_layer('fc')
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index b027f84b5d..053c92d005 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -103,6 +103,7 @@ __all__ = [
     'nce_layer',
     'cross_entropy_with_selfnorm',
     'cross_entropy',
+    'BeamInput',
     'cross_entropy_over_beam',
     'multi_binary_label_cross_entropy',
     'sum_cost',
@@ -5681,10 +5682,10 @@ def multi_binary_label_cross_entropy(input,
 
     if input.activation is None or \
             not isinstance(input.activation, SigmoidActivation):
-        logger.log(
-            logging.WARN,
-            "%s is not recommend for multi_binary_label_cross_entropy's activation, "
-            "maybe the sigmoid is better" % repr(input.activation))
+        logger.log(logging.WARN,
+                   ("%s is not a recommended activation for "
+                    "multi_binary_label_cross_entropy, sigmoid is better") %
+                   repr(input.activation))
 
     Layer(
         name=name,
@@ -5699,26 +5700,110 @@ def multi_binary_label_cross_entropy(input,
         size=1)
 
 
+class BeamInput(object):
+    """
+    Define the input for cross_entropy_over_beam layer.
+
+    A beam is made up of a triple: the first one is scores over all
+    candidates; the second one is indices of top k selected candidates; the
+    third one is the index of ground truth, which is also always called
+    gold.
+    """
+
+    def __init__(self, candidate_scores, selected_candidates, gold):
+        assert isinstance(candidate_scores, LayerOutput)
+        self.candidate_scores = candidate_scores
+        assert candidate_scores.size == 1
+
+        assert isinstance(selected_candidates, LayerOutput)
+        self.selected_candidates = selected_candidates
+
+        assert isinstance(gold, LayerOutput)
+        self.gold = gold
+
+
 @wrap_name_default()
 @layer_support()
-def cross_entropy_over_beam(input, label, name=None, coeff=1.0, weight=None):
-    """
-    TODO(caoying) add comments.
+def cross_entropy_over_beam(input, name=None):
     """
+    This layer is used in learning to search models, which is to solve complex
+    joint prediction problems based on learning to search through a
+    problem-defined search space.
 
-    assert len(input) / 2 == len(label), "Error input numbers."
-    for i in range(0, len(input), 2):
-        assert (input[i].size == 1), (
-            "Inputs for this layer are made up of "
-            "several pairs and the first one in a pair is scores for "
-            "all the candidates, so its size should be equal to 1.")
+    Specifically, the learning to search process for this layer begins with
+    searching a target sequence from a nested sequence. In the first search
+    step, top beam size sequences with highest scores, indices of these top k
+    sequences in the original nested sequence, and the ground truth (also
+    called gold) altogether (a triple) make up of the first beam.
 
-    ipts, parents = __cost_input__(input, label, weight)
-    Layer(
-        name=name,
-        type=LayerType.CROSS_ENTROPY_OVER_BEAM,
-        inputs=ipts,
-        coeff=coeff)
+    Then, several special positions, for example, start and end positions
+    that define meaningful segments are searched. In these searches, top k
+    positions with highest scores are selected, and then sequence, starting
+    from the selected starts till ends of the sequences (or a fixed position)
+    are taken to search next.
+
+    We call the possible top k results returned in one search the beam. This
+    search process can be repeated for pre-defined turns and leads to several
+    beam expansions.
+
+    Finally, the layer cross_entropy_over_beam takes all the beam expansions
+    which contain several candidate targets found along the multi-step search.
+    cross_entropy_over_beam calculates cross entropy over the expanded beams
+    which all the candidates in the beam as the normalized factor.
+
+    Note that, if gold falls off the beam at search step t, then the cost is
+    calculated over the beam at step t.
+
+    This cost layer always works together with kmax_sequence_score_layer,
+    sub_nested_seq_layer, and sequence_slice_layer to trim the input to form a
+    sub-search space.
+
+
+    The example usage is:
+
+    .. code-block:: python
+
+       cost = cross_entropy_over_beam(input=[
+           BeamInput(
+               candidate_scores=beam1_candidates,
+               selected_candidates=beam1_topk,
+               gold=gold1),
+           BeamInput(
+               candidate_scores=beam2_candidates,
+               selected_candidates=beam2_topk,
+               gold=gold2),
+       ])
+
+
+    :param input: input beams for this layer.
+    :type input: BeamInput
+    :param name: input beams for this layer.
+    :type name: basestring
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+    """
+
+    if isinstance(input, BeamInput):
+        input = [input]
+    else:
+        assert isinstance(input, list), (
+            'input for cross_entropy_over_beam shold be a python list '
+            'of BeamInput object.')
+        for ipt in input:
+            assert isinstance(ipt, BeamInput), (
+                'input for cross_entropy_over_beam '
+                'should be a BeamInput object.')
+
+    ipts = []
+    parents = []
+    for beam in input:
+        parents += [beam.candidate_scores, beam.selected_candidates, beam.gold]
+        ipts += [
+            beam.candidate_scores.name, beam.selected_candidates.name,
+            beam.gold.name
+        ]
+
+    Layer(name=name, type=LayerType.CROSS_ENTROPY_OVER_BEAM, inputs=ipts)
     return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1)
 
 
@@ -6247,11 +6332,11 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1):
 @wrap_bias_attr_default()
 def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None):
     """
-    A layer applies a linear transformation to each element in each row of 
-    the input matrix. For each element, the layer first re-scale it and then 
+    A layer applies a linear transformation to each element in each row of
+    the input matrix. For each element, the layer first re-scale it and then
     adds a bias to it.
 
-    This layer is very like the SlopeInterceptLayer, except the scale and 
+    This layer is very like the SlopeInterceptLayer, except the scale and
     bias are trainable.
 
     .. math::
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
index e44478ec2b..c43fc48e22 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
@@ -114,27 +114,26 @@ layers {
     input_layer_name: "__kmax_sequence_score_layer_0__"
   }
   inputs {
-    input_layer_name: "__fc_layer_0__"
+    input_layer_name: "sentences_ids"
   }
   inputs {
-    input_layer_name: "__kmax_sequence_score_layer_1__"
+    input_layer_name: "__fc_layer_0__"
   }
   inputs {
-    input_layer_name: "__fc_layer_1__"
+    input_layer_name: "__kmax_sequence_score_layer_1__"
   }
   inputs {
-    input_layer_name: "__kmax_sequence_score_layer_2__"
+    input_layer_name: "start_ids"
   }
   inputs {
-    input_layer_name: "sentences_ids"
+    input_layer_name: "__fc_layer_1__"
   }
   inputs {
-    input_layer_name: "start_ids"
+    input_layer_name: "__kmax_sequence_score_layer_2__"
   }
   inputs {
     input_layer_name: "end_ids"
   }
-  coeff: 1.0
 }
 parameters {
   name: "___fc_layer_0__.w0"
@@ -177,8 +176,8 @@ parameters {
   initial_smart: false
 }
 input_layer_names: "sentence_scores"
-input_layer_names: "sentence_states"
 input_layer_names: "sentences_ids"
+input_layer_names: "sentence_states"
 input_layer_names: "start_ids"
 input_layer_names: "end_ids"
 output_layer_names: "__cross_entropy_over_beam_0__"
@@ -198,8 +197,8 @@ sub_models {
   layer_names: "end_ids"
   layer_names: "__cross_entropy_over_beam_0__"
   input_layer_names: "sentence_scores"
-  input_layer_names: "sentence_states"
   input_layer_names: "sentences_ids"
+  input_layer_names: "sentence_states"
   input_layer_names: "start_ids"
   input_layer_names: "end_ids"
   output_layer_names: "__cross_entropy_over_beam_0__"
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
index edc2d32fca..240e703dc9 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
@@ -29,11 +29,17 @@ topk_end_pos_ids = kmax_sequence_score_layer(
 sentence_idx = data_layer(name="sentences_ids", size=1)
 start_idx = data_layer(name="start_ids", size=1)
 end_idx = data_layer(name="end_ids", size=1)
-cost = cross_entropy_over_beam(
-    input=[
-        sentence_scores, topk_sentence_ids, start_pos_scores,
-        topk_start_pos_ids, end_pos_scores, topk_end_pos_ids
-    ],
-    label=[sentence_idx, start_idx, end_idx])
+cost = cross_entropy_over_beam(input=[
+    BeamInput(
+        candidate_scores=sentence_scores,
+        selected_candidates=topk_sentence_ids,
+        gold=sentence_idx), BeamInput(
+            candidate_scores=start_pos_scores,
+            selected_candidates=topk_start_pos_ids,
+            gold=start_idx), BeamInput(
+                candidate_scores=end_pos_scores,
+                selected_candidates=topk_end_pos_ids,
+                gold=end_idx)
+])
 
 outputs(cost)

From 76677f25774a84d9ced011be02e62ae15b03506c Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Wed, 23 Aug 2017 09:12:34 -0700
Subject: [PATCH 077/170] add test

---
 python/paddle/v2/framework/tests/mnist.py | 36 ++++++++++++++++++-----
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py
index ededf767bc..e47de2436f 100644
--- a/python/paddle/v2/framework/tests/mnist.py
+++ b/python/paddle/v2/framework/tests/mnist.py
@@ -52,7 +52,7 @@ def grad_var_name(var_name):
     return var_name + "@GRAD"
 
 
-def sgd_optimizer(net, param_name, learning_rate=0.01):
+def sgd_optimizer(net, param_name, learning_rate=0.005):
     grad_name = grad_var_name(param_name)
     optimize_op = Operator(
         "sgd",
@@ -166,9 +166,9 @@ def set_cost():
     cost_grad.set(numpy.ones(cost_shape).astype("float32"), place)
 
 
-def print_cost():
+def mean_cost():
     cost_data = numpy.array(scope.find_var("cross_entropy_3").get_tensor())
-    print(cost_data.sum() / len(cost_data))
+    return cost_data.sum() / len(cost_data)
 
 
 def error_rate(predict, label):
@@ -176,7 +176,7 @@ def error_rate(predict, label):
         axis=1)
     label = numpy.array(scope.find_var(label).get_tensor())
     error_num = numpy.sum(predict_var != label)
-    print(error_num / float(len(label)))
+    return error_num / float(len(label))
 
 
 images = data_layer(name='pixel', dims=[BATCH_SIZE, 784])
@@ -198,16 +198,35 @@ print_inputs_outputs(forward_network)
 print_inputs_outputs(backward_net)
 print_inputs_outputs(optimize_net)
 
-reader = paddle.batch(
+train_reader = paddle.batch(
     paddle.reader.shuffle(
         paddle.dataset.mnist.train(), buf_size=8192),
     batch_size=BATCH_SIZE)
 
+
+def test():
+    test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
+    cost = []
+    error = []
+    for data in test_reader():
+        image = numpy.array(map(lambda x: x[0], data)).astype("float32")
+        label = numpy.array(map(lambda x: x[1], data)).astype("int32")
+        feed_data("pixel", image)
+        feed_data("label", label)
+
+        forward_network.infer_shape(scope)
+        forward_network.run(scope, dev_ctx)
+        cost.append(mean_cost())
+        error.append(error_rate(predict, "label"))
+    print("cost=" + str(sum(cost) / float(len(cost))) + " error_rate=" + str(
+        sum(error) / float(len(error))))
+
+
 PASS_NUM = 1000
 for pass_id in range(PASS_NUM):
     batch_id = 0
 
-    for data in reader():
+    for data in train_reader():
         image = numpy.array(map(lambda x: x[0], data)).astype("float32")
         label = numpy.array(map(lambda x: x[1], data)).astype("int32")
         feed_data("pixel", image)
@@ -222,7 +241,8 @@ for pass_id in range(PASS_NUM):
         optimize_net.run(scope, dev_ctx)
         if batch_id % 100 == 0:
             print("pass[" + str(pass_id) + "] batch_id[" + str(batch_id) + "]")
-            print_cost()
-            error_rate(predict, "label")
+            test()
+            # print(mean_cost())
+            # print(error_rate(predict, "label"))
 
         batch_id = batch_id + 1

From cf515e4a72f4b02fbbbfdbd79c3b66b1be694e7b Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Wed, 23 Aug 2017 09:39:47 -0700
Subject: [PATCH 078/170] optimize code and name

---
 python/paddle/v2/framework/tests/mnist.py | 56 +++++++++++------------
 1 file changed, 27 insertions(+), 29 deletions(-)

diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py
index e47de2436f..886e99610d 100644
--- a/python/paddle/v2/framework/tests/mnist.py
+++ b/python/paddle/v2/framework/tests/mnist.py
@@ -134,7 +134,7 @@ def cross_entropy_layer(net, input, label):
     return cost_name
 
 
-def get_backward_net(forward_net):
+def create_backward_net(forward_net):
     net = core.Operator.backward(forward_net, set())
     for input in net.inputs()["all"]:
         var = scope.new_var(input)
@@ -145,29 +145,29 @@ def get_backward_net(forward_net):
     return net
 
 
-def print_inputs_outputs(op):
+def debug_print_op(op):
     print("===============" + op.type() + "==============")
     print("***inputs:***")
     for input in op.inputs()["all"]:
         print input, scope.find_var(input).get_tensor().get_dims()
-    print("***outputs:***")
+    print("\n***outputs:***")
     for output in op.outputs()["all"]:
         print output, scope.find_var(output).get_tensor().get_dims()
     print("")
     print("")
 
 
-def set_cost():
-    cost_shape = numpy.array(scope.find_var("cross_entropy_3").get_tensor(
-    )).shape
-    cost_grad = scope.find_var(grad_var_name("cross_entropy_3")).get_tensor()
+def set_cost(cost):
+    cost_shape = numpy.array(scope.find_var(cost).get_tensor()).shape
+    cost_grad = \
+        scope.find_var(grad_var_name(cost)).get_tensor()
     cost_grad.set_dims(cost_shape)
     cost_grad.alloc_float(place)
     cost_grad.set(numpy.ones(cost_shape).astype("float32"), place)
 
 
-def mean_cost():
-    cost_data = numpy.array(scope.find_var("cross_entropy_3").get_tensor())
+def mean_cost(cost):
+    cost_data = numpy.array(scope.find_var(cost).get_tensor())
     return cost_data.sum() / len(cost_data)
 
 
@@ -180,23 +180,23 @@ def error_rate(predict, label):
 
 
 images = data_layer(name='pixel', dims=[BATCH_SIZE, 784])
-label = data_layer(name='label', dims=[BATCH_SIZE])
+labels = data_layer(name='label', dims=[BATCH_SIZE])
 fc1 = fc_layer(net=forward_network, input=images, size=100, act="sigmoid")
 fc2 = fc_layer(net=forward_network, input=fc1, size=100, act="sigmoid")
 predict = fc_layer(net=forward_network, input=fc2, size=100, act="softmax")
-cost = cross_entropy_layer(net=forward_network, input=predict, label=label)
+cost = cross_entropy_layer(net=forward_network, input=predict, label=labels)
 
 forward_network.complete_add_op(True)
-backward_net = get_backward_net(forward_network)
+backward_net = create_backward_net(forward_network)
 optimize_net.complete_add_op(True)
 
 print(forward_network)
 print(backward_net)
 print(optimize_net)
 
-print_inputs_outputs(forward_network)
-print_inputs_outputs(backward_net)
-print_inputs_outputs(optimize_net)
+debug_print_op(forward_network)
+debug_print_op(backward_net)
+debug_print_op(optimize_net)
 
 train_reader = paddle.batch(
     paddle.reader.shuffle(
@@ -204,19 +204,19 @@ train_reader = paddle.batch(
     batch_size=BATCH_SIZE)
 
 
-def test():
+def test(cost_name):
     test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
     cost = []
     error = []
     for data in test_reader():
-        image = numpy.array(map(lambda x: x[0], data)).astype("float32")
-        label = numpy.array(map(lambda x: x[1], data)).astype("int32")
-        feed_data("pixel", image)
-        feed_data("label", label)
+        image_data = numpy.array(map(lambda x: x[0], data)).astype("float32")
+        label_data = numpy.array(map(lambda x: x[1], data)).astype("int32")
+        feed_data(images, image_data)
+        feed_data(labels, label_data)
 
         forward_network.infer_shape(scope)
         forward_network.run(scope, dev_ctx)
-        cost.append(mean_cost())
+        cost.append(mean_cost(cost_name))
         error.append(error_rate(predict, "label"))
     print("cost=" + str(sum(cost) / float(len(cost))) + " error_rate=" + str(
         sum(error) / float(len(error))))
@@ -227,22 +227,20 @@ for pass_id in range(PASS_NUM):
     batch_id = 0
 
     for data in train_reader():
-        image = numpy.array(map(lambda x: x[0], data)).astype("float32")
-        label = numpy.array(map(lambda x: x[1], data)).astype("int32")
-        feed_data("pixel", image)
-        feed_data("label", label)
+        image_data = numpy.array(map(lambda x: x[0], data)).astype("float32")
+        label_data = numpy.array(map(lambda x: x[1], data)).astype("int32")
+        feed_data(images, image_data)
+        feed_data(labels, label_data)
 
         forward_network.infer_shape(scope)
         forward_network.run(scope, dev_ctx)
-        set_cost()
+        set_cost(cost)
         backward_net.infer_shape(scope)
         backward_net.run(scope, dev_ctx)
 
         optimize_net.run(scope, dev_ctx)
         if batch_id % 100 == 0:
             print("pass[" + str(pass_id) + "] batch_id[" + str(batch_id) + "]")
-            test()
-            # print(mean_cost())
-            # print(error_rate(predict, "label"))
+            test(cost)
 
         batch_id = batch_id + 1

From 9db4ad6130d79d72fa150e534b5b54fa723c3240 Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Wed, 23 Aug 2017 09:42:58 -0700
Subject: [PATCH 079/170] reduce pass num to 1

---
 python/paddle/v2/framework/tests/mnist.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py
index 886e99610d..eefd5709a3 100644
--- a/python/paddle/v2/framework/tests/mnist.py
+++ b/python/paddle/v2/framework/tests/mnist.py
@@ -222,7 +222,7 @@ def test(cost_name):
         sum(error) / float(len(error))))
 
 
-PASS_NUM = 1000
+PASS_NUM = 1
 for pass_id in range(PASS_NUM):
     batch_id = 0
 

From 37cd8165b3089c8e4a6ce743f5e0ee8c029ba46b Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Wed, 23 Aug 2017 10:56:56 -0700
Subject: [PATCH 080/170] change 128 to BATCH_SIZE

---
 python/paddle/v2/framework/tests/mnist.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py
index eefd5709a3..e878bfa4e9 100644
--- a/python/paddle/v2/framework/tests/mnist.py
+++ b/python/paddle/v2/framework/tests/mnist.py
@@ -205,7 +205,8 @@ train_reader = paddle.batch(
 
 
 def test(cost_name):
-    test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
+    test_reader = paddle.batch(
+        paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
     cost = []
     error = []
     for data in test_reader():

From 72d29186bb426efc4eb78d9d6b6e605c7e2ce56c Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Wed, 23 Aug 2017 11:07:15 -0700
Subject: [PATCH 081/170] reduce some compile warning

---
 paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp | 4 ++--
 paddle/operators/net_op_test.cc                              | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
index 1829f72a87..d00d408ab8 100644
--- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
@@ -1399,8 +1399,8 @@ void RecurrentGradientMachine::createDataOutlinkCopySizeInfo(
              getBeamSize() > 1 ? finalPaths_.size() : finalPaths_[0].size());
     int* starts = inputSeqStartPos->getMutableData(false);
     int seqId = 0;
-    for (int i = 0; i < finalPaths_.size(); ++i) {
-      for (int j = 0; j < finalPaths_[i].size(); ++j) {
+    for (size_t i = 0; i < finalPaths_.size(); ++i) {
+      for (size_t j = 0; j < finalPaths_[i].size(); ++j) {
         copySize[seqId] = getBeamSize() > 1 ? starts[i + 1] - starts[i]
                                             : starts[j + 1] - starts[j];
         batchMachineStartPos_[seqId + 1] =
diff --git a/paddle/operators/net_op_test.cc b/paddle/operators/net_op_test.cc
index 99019754a9..f2e98ee7a1 100644
--- a/paddle/operators/net_op_test.cc
+++ b/paddle/operators/net_op_test.cc
@@ -79,7 +79,7 @@ TEST(NetOp, Clone) {
   ASSERT_NE(new_net_op, nullptr);
   ASSERT_TRUE(new_net_op->IsNetOp());
   auto* new_net = static_cast<NetOp*>(new_net_op.get());
-  ASSERT_EQ(2, new_net->ops_.size());
+  ASSERT_EQ(2UL, new_net->ops_.size());
   ASSERT_EQ(new_net->ops_[0]->Type(), "empty");
   ASSERT_EQ(new_net->ops_[1]->Type(), "empty2");
 }

From 3b6069b7f511e41a00551cc3141385d9eb464f51 Mon Sep 17 00:00:00 2001
From: haonanyu <haonanyu@baidu.com>
Date: Wed, 23 Aug 2017 12:16:36 -0700
Subject: [PATCH 082/170] fix ids reverse order in fillGenOutputs

---
 paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
index 1829f72a87..56c3951cfd 100644
--- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
@@ -1344,7 +1344,7 @@ void RecurrentGradientMachine::fillGenOutputs() {
       CHECK(!finalPaths_[i].empty());
       Path& path = finalPaths_[i][0];
       generator_.ids.insert(
-          generator_.ids.begin(), path.ids.begin(), path.ids.end());
+          generator_.ids.end(), path.ids.begin(), path.ids.end());
       starts[i + 1] = starts[i] + path.ids.size();
     }
   }

From da7a1f2f6c355b1bcdc0bd88e644f027d70f75d8 Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Wed, 23 Aug 2017 21:30:08 +0000
Subject: [PATCH 083/170] master client: retry connecting to etcd

---
 go/master/client.go | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/go/master/client.go b/go/master/client.go
index 62801b9b7f..9344c6f0ab 100644
--- a/go/master/client.go
+++ b/go/master/client.go
@@ -60,13 +60,30 @@ func WithAddr(addr string) func(c *Client) error {
 	}
 }
 
+func retry(f func() error, dur time.Duration, count int) error {
+	err := f()
+	if err != nil {
+		if count > 0 {
+			return retry(f, dur, count-1)
+		}
+		return err
+	}
+	return nil
+}
+
 // WithEtcd sets the client to use etcd for master discovery.
 func WithEtcd(endpoints []string, timeout time.Duration) func(*Client) error {
 	return func(c *Client) error {
-		cli, err := clientv3.New(clientv3.Config{
-			Endpoints:   endpoints,
-			DialTimeout: timeout,
-		})
+		var cli *clientv3.Client
+		f := func() error {
+			var err error
+			cli, err = clientv3.New(clientv3.Config{
+				Endpoints:   endpoints,
+				DialTimeout: timeout,
+			})
+			return err
+		}
+		err := retry(f, time.Second, 10)
 		if err != nil {
 			return err
 		}
@@ -101,9 +118,6 @@ func NewClient(opts ...func(*Client) error) (*Client, error) {
 		}
 	}
 	c.ch = make(chan record, c.bufSize)
-	// FIXME: connection is created asyncrosly in monitorMaster go routine,
-	//        ensure the connection is ready for use before calling c.addClient.
-	time.Sleep(time.Second)
 	return c, nil
 }
 

From 5270585e107b16dc527ada329dddf6fc44714a35 Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Wed, 23 Aug 2017 21:38:43 +0000
Subject: [PATCH 084/170] fix according to comment

---
 go/master/client.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/go/master/client.go b/go/master/client.go
index 9344c6f0ab..199690d488 100644
--- a/go/master/client.go
+++ b/go/master/client.go
@@ -64,6 +64,7 @@ func retry(f func() error, dur time.Duration, count int) error {
 	err := f()
 	if err != nil {
 		if count > 0 {
+			time.Sleep(dur)
 			return retry(f, dur, count-1)
 		}
 		return err

From 05176bd1bb5af94bfbabbb524ed9e65448134e39 Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Thu, 24 Aug 2017 01:23:27 +0000
Subject: [PATCH 085/170] master server will wait etcd forever

---
 go/master/client.go | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/go/master/client.go b/go/master/client.go
index 199690d488..f04cf50ce3 100644
--- a/go/master/client.go
+++ b/go/master/client.go
@@ -60,18 +60,6 @@ func WithAddr(addr string) func(c *Client) error {
 	}
 }
 
-func retry(f func() error, dur time.Duration, count int) error {
-	err := f()
-	if err != nil {
-		if count > 0 {
-			time.Sleep(dur)
-			return retry(f, dur, count-1)
-		}
-		return err
-	}
-	return nil
-}
-
 // WithEtcd sets the client to use etcd for master discovery.
 func WithEtcd(endpoints []string, timeout time.Duration) func(*Client) error {
 	return func(c *Client) error {
@@ -84,9 +72,14 @@ func WithEtcd(endpoints []string, timeout time.Duration) func(*Client) error {
 			})
 			return err
 		}
-		err := retry(f, time.Second, 10)
-		if err != nil {
-			return err
+		for {
+			err := f()
+			if err != nil {
+				log.Warningln(err)
+			} else {
+				break
+			}
+			time.Sleep(time.Second)
 		}
 
 		ch := make(chan string, 1)

From 161a15f055c2cbe1937522a7a11dbdeb31f1a774 Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Thu, 24 Aug 2017 03:11:54 +0000
Subject: [PATCH 086/170] gradient check

---
 python/paddle/v2/framework/tests/gradient_checker.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py
index c22c6f8831..d7809e52fb 100644
--- a/python/paddle/v2/framework/tests/gradient_checker.py
+++ b/python/paddle/v2/framework/tests/gradient_checker.py
@@ -86,6 +86,9 @@ def get_numeric_gradient(op,
     # we only compute gradient of one element each time.
     # we use a for loop to compute the gradient of every element.
     for i in xrange(tensor_size):
+        for var_name in input_values:
+            tensor_ = local_scope.find_var(var_name).get_tensor()
+            tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace())
         # get one input element throw it's index i.
         origin = tensor_to_check.get_float_element(i)
 
@@ -95,6 +98,9 @@ def get_numeric_gradient(op,
         y_pos = get_output()
 
         # plus delta to this element, run op and get the sum of the result tensor.
+        for var_name in input_values:
+            tensor_ = local_scope.find_var(var_name).get_tensor()
+            tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace())
         x_neg = origin - delta
         tensor_to_check.set_float_element(i, x_neg)
         y_neg = get_output()

From 0e300f9bf04ba459dbef93af9537f847cebbcd27 Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Wed, 23 Aug 2017 20:14:54 -0700
Subject: [PATCH 087/170] use init_net and random_op to initialize parameter

---
 python/paddle/v2/framework/tests/mnist.py | 54 +++++++++++------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py
index e878bfa4e9..0c27ce3e35 100644
--- a/python/paddle/v2/framework/tests/mnist.py
+++ b/python/paddle/v2/framework/tests/mnist.py
@@ -9,11 +9,8 @@ scope = core.Scope()
 place = core.CPUPlace()
 dev_ctx = core.DeviceContext.create(place)
 
-# init_net = core.Net.create()
-forward_network = core.Net.create()
-
-# should be init after forward_op is constructed
-# backward_net = core.Operator.backward(forward_net, set())
+init_net = core.Net.create()
+forward_net = core.Net.create()
 backward_net = None
 optimize_net = core.Net.create()
 
@@ -64,13 +61,12 @@ def sgd_optimizer(net, param_name, learning_rate=0.005):
 
 
 # should use operator and add these to the init_network
-def init_param(param_name, dims):
-    var = scope.new_var(param_name)
-    tensor = var.get_tensor()
-    tensor.set_dims(dims)
-    data = numpy.random.uniform(
-        low=-0.5, high=0.5, size=tensor.shape()).astype("float32")
-    tensor.set(data, place)
+def init_param(net, param_name, dims):
+    scope.new_var(param_name)
+    op = Operator(
+        "uniform_random", Out=param_name, dims=dims, min=-0.5, max=0.5, seed=10)
+    op.infer_shape(scope)
+    net.append_op(op)
 
 
 # fc_layer
@@ -96,7 +92,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None):
     input_dims = scope.find_var(input).get_tensor().get_dims()
 
     w_name = param or name + ".w"
-    init_param(param_name=w_name, dims=[input_dims[1], size])
+    init_param(net=init_net, param_name=w_name, dims=[input_dims[1], size])
     sgd_optimizer(net=optimize_net, param_name=w_name, learning_rate=0.01)
 
     pre_activation = name + ".mul.out"
@@ -107,7 +103,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None):
     # create bias variable if needed
     if bias:
         bias_name = name + ".b"
-        init_param(param_name=bias_name, dims=[size])
+        init_param(net=init_net, param_name=bias_name, dims=[size])
         sgd_optimizer(
             net=optimize_net, param_name=bias_name, learning_rate=0.001)
         bias_out = name + ".rowwise_add.out"
@@ -181,20 +177,22 @@ def error_rate(predict, label):
 
 images = data_layer(name='pixel', dims=[BATCH_SIZE, 784])
 labels = data_layer(name='label', dims=[BATCH_SIZE])
-fc1 = fc_layer(net=forward_network, input=images, size=100, act="sigmoid")
-fc2 = fc_layer(net=forward_network, input=fc1, size=100, act="sigmoid")
-predict = fc_layer(net=forward_network, input=fc2, size=100, act="softmax")
-cost = cross_entropy_layer(net=forward_network, input=predict, label=labels)
-
-forward_network.complete_add_op(True)
-backward_net = create_backward_net(forward_network)
+fc1 = fc_layer(net=forward_net, input=images, size=100, act="sigmoid")
+fc2 = fc_layer(net=forward_net, input=fc1, size=100, act="sigmoid")
+predict = fc_layer(net=forward_net, input=fc2, size=100, act="softmax")
+cost = cross_entropy_layer(net=forward_net, input=predict, label=labels)
+
+init_net.complete_add_op(True)
+forward_net.complete_add_op(True)
+backward_net = create_backward_net(forward_net)
 optimize_net.complete_add_op(True)
 
-print(forward_network)
+print(init_net)
+print(forward_net)
 print(backward_net)
 print(optimize_net)
 
-debug_print_op(forward_network)
+debug_print_op(forward_net)
 debug_print_op(backward_net)
 debug_print_op(optimize_net)
 
@@ -215,8 +213,8 @@ def test(cost_name):
         feed_data(images, image_data)
         feed_data(labels, label_data)
 
-        forward_network.infer_shape(scope)
-        forward_network.run(scope, dev_ctx)
+        forward_net.infer_shape(scope)
+        forward_net.run(scope, dev_ctx)
         cost.append(mean_cost(cost_name))
         error.append(error_rate(predict, "label"))
     print("cost=" + str(sum(cost) / float(len(cost))) + " error_rate=" + str(
@@ -224,6 +222,8 @@ def test(cost_name):
 
 
 PASS_NUM = 1
+
+init_net.run(scope, dev_ctx)
 for pass_id in range(PASS_NUM):
     batch_id = 0
 
@@ -233,8 +233,8 @@ for pass_id in range(PASS_NUM):
         feed_data(images, image_data)
         feed_data(labels, label_data)
 
-        forward_network.infer_shape(scope)
-        forward_network.run(scope, dev_ctx)
+        forward_net.infer_shape(scope)
+        forward_net.run(scope, dev_ctx)
         set_cost(cost)
         backward_net.infer_shape(scope)
         backward_net.run(scope, dev_ctx)

From 0ee18a86d18b4d4506c63e13b2953c9153c27f8d Mon Sep 17 00:00:00 2001
From: wanghaoshuang <wanghaoshuang@baidu.com>
Date: Thu, 24 Aug 2017 11:50:35 +0800
Subject: [PATCH 088/170] Fix doc.

---
 python/paddle/trainer_config_helpers/layers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index b3568cc257..f323b017c0 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -2418,8 +2418,8 @@ def img_conv_layer(input,
     :param dilation: The x dimension of the dilation. Or input a tuple for two
                     image dimension
     :type dilation: int|tuple|list
-    :param padding_y: The y dimension of the dilation.
-    :type padding_y: int
+    :param dilation_y: The y dimension of the dilation.
+    :type dilation_y: int
     :param bias_attr: Convolution bias attribute. None means default bias.
                       False means no bias.
     :type bias_attr: ParameterAttribute|False

From 12864f142073b4a280120e4d9b3abe4e2483ca32 Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Wed, 23 Aug 2017 22:51:35 -0700
Subject: [PATCH 089/170] register rowwise add gpu kernel

---
 paddle/operators/rowwise_add_op.cu | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/paddle/operators/rowwise_add_op.cu b/paddle/operators/rowwise_add_op.cu
index cbc61ad3e1..4a57f64c89 100644
--- a/paddle/operators/rowwise_add_op.cu
+++ b/paddle/operators/rowwise_add_op.cu
@@ -18,3 +18,6 @@
 namespace ops = paddle::operators;
 REGISTER_OP_GPU_KERNEL(
     rowwise_add, ops::RowwiseAddKernel<paddle::platform::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(
+    rowwise_add_grad,
+    ops::RowwiseAddGradKernel<paddle::platform::GPUPlace, float>);

From 3648165b63bd5331d1809cba896176e4af0a9ff2 Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Wed, 23 Aug 2017 23:00:45 -0700
Subject: [PATCH 090/170] add gpu support

---
 python/paddle/v2/framework/tests/mnist.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py
index 0c27ce3e35..d9941023fe 100644
--- a/python/paddle/v2/framework/tests/mnist.py
+++ b/python/paddle/v2/framework/tests/mnist.py
@@ -7,6 +7,8 @@ BATCH_SIZE = 100
 
 scope = core.Scope()
 place = core.CPUPlace()
+# if you want to test GPU training, you can use gpu place
+# place = core.GPUPlace(0)
 dev_ctx = core.DeviceContext.create(place)
 
 init_net = core.Net.create()

From 625b15355a16fa42476e7dbd166b77e092dcb97f Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Wed, 23 Aug 2017 23:56:55 -0700
Subject: [PATCH 091/170] optimize code

---
 python/paddle/v2/framework/tests/mnist.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py
index d9941023fe..9a0b109850 100644
--- a/python/paddle/v2/framework/tests/mnist.py
+++ b/python/paddle/v2/framework/tests/mnist.py
@@ -17,14 +17,14 @@ backward_net = None
 optimize_net = core.Net.create()
 
 
-def atom_id():
+def atomic_id():
     id = 0
     while True:
         yield id
         id += 1
 
 
-uniq_id = atom_id().next
+uniq_id = atomic_id().next
 
 
 def data_layer(name, dims):
@@ -164,7 +164,7 @@ def set_cost(cost):
     cost_grad.set(numpy.ones(cost_shape).astype("float32"), place)
 
 
-def mean_cost(cost):
+def get_cost_mean(cost):
     cost_data = numpy.array(scope.find_var(cost).get_tensor())
     return cost_data.sum() / len(cost_data)
 
@@ -217,7 +217,7 @@ def test(cost_name):
 
         forward_net.infer_shape(scope)
         forward_net.run(scope, dev_ctx)
-        cost.append(mean_cost(cost_name))
+        cost.append(get_cost_mean(cost_name))
         error.append(error_rate(predict, "label"))
     print("cost=" + str(sum(cost) / float(len(cost))) + " error_rate=" + str(
         sum(error) / float(len(error))))

From 790379f1477835badbc35c563623d13ec5fd2b7a Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Wed, 23 Aug 2017 14:11:30 +0800
Subject: [PATCH 092/170] fix above comments

---
 paddle/cuda/include/hl_cnn.h             | 106 ------------------
 paddle/cuda/include/stub/hl_cnn_stub.h   |   6 +-
 paddle/gserver/layers/Pool3DLayer.cpp    |  71 +++++-------
 paddle/gserver/layers/Pool3DLayer.h      |   1 +
 paddle/math/Matrix.cpp                   | 131 +++++++++++-----------
 paddle/math/Matrix.h                     | 135 +++++++++++------------
 paddle/math/tests/test_matrixCompare.cpp |  97 ++++++++--------
 7 files changed, 208 insertions(+), 339 deletions(-)

diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/cuda/include/hl_cnn.h
index e9687d0a58..84f1c84359 100644
--- a/paddle/cuda/include/hl_cnn.h
+++ b/paddle/cuda/include/hl_cnn.h
@@ -173,31 +173,6 @@ extern void hl_avgpool_backward(const int frameCnt,
                                 real* backGrad,
                                 const int outStride);
 
-/**
- * @brief   Maximum pool forward.
- *
- * @param[in]   frameCnt    batch size of input image.
- * @param[in]   inputData   input data.
- * @param[in]   channels    number of channel.
- * @param[in]   depth      image depth.
- * @param[in]   height      image height.
- * @param[in]   width       image width.
- * @param[in]   pooledD     output image depth.
- * @param[in]   pooledH     output image height.
- * @param[in]   pooledW     output image width.
- * @param[in]   sizeZ       depth of pooling window.
- * @param[in]   sizeY       height of pooling window.
- * @param[in]   sizeX       width of pooling window.
- * @param[in]   strideD     pooling stride depth.
- * @param[in]   strideH     pooling stride height.
- * @param[in]   strideW     pooling stride width.
- * @param[in]   paddingD    padding depth.
- * @param[in]   paddingH    padding height.
- * @param[in]   paddingW    padding width.
- * @param[out]  tgtData     output data.
- * @param[in]   tgtStride   stride between output data samples.
- *
- */
 extern void hl_maxpool3D_forward(const int frameCnt,
                                  const real* inputData,
                                  const int channels,
@@ -219,35 +194,6 @@ extern void hl_maxpool3D_forward(const int frameCnt,
                                  real* tgtData,
                                  const int tgtStride);
 
-/**
- * @brief   Maximum pool backward.
- *
- * @param[in]   frameCnt    batch size of input image.
- * @param[in]   inputData   input data.
- * @param[out]  outData     output data.
- * @param[out]  outGrad     output grad data.
- * @param[in]   channels    number of channel.
- * @param[in]   depth       image depth.
- * @param[in]   height      image height.
- * @param[in]   width       image width.
- * @param[in]   pooledD     output image depth.
- * @param[in]   pooledH     output image height.
- * @param[in]   pooledW     output image width.
- * @param[in]   sizeZ       depth of pooling window.
- * @param[in]   sizeY       height of pooling window.
- * @param[in]   sizeX       width of pooling window.
- * @param[in]   strideD     pooling stride depth.
- * @param[in]   strideH     pooling stride height.
- * @param[in]   strideW     pooling stride width.
- * @param[in]   scaleA      scale.
- * @param[in]   scaleB      scale.
- * @param[in]   paddingD    padding depth.
- * @param[in]   paddingH    padding height.
- * @param[in]   paddingW    padding width.
- * @param[out]  targetGrad  output grad.
- * @param[in]   outStride   stride between output data samples.
- *
- */
 extern void hl_maxpool3D_backward(const int frameCnt,
                                   const real* inputData,
                                   const real* outData,
@@ -273,31 +219,6 @@ extern void hl_maxpool3D_backward(const int frameCnt,
                                   real* targetGrad,
                                   const int outStride);
 
-/**
- * @brief   Averge pool forward.
- *
- * @param[in]   frameCnt    batch size of input image.
- * @param[in]   inputData   input data.
- * @param[in]   channels    number of channel.
- * @param[in]   depth       image depth.
- * @param[in]   height      image height.
- * @param[in]   width       image width.
- * @param[in]   pooledD     output image depth.
- * @param[in]   pooledH     output image height.
- * @param[in]   pooledW     output image width.
- * @param[in]   sizeZ       depth of pooling window.
- * @param[in]   sizeY       height of pooling window.
- * @param[in]   sizeX       width of pooling window.
- * @param[in]   strideD     pooling stride depth.
- * @param[in]   strideH     pooling stride height.
- * @param[in]   strideW     pooling stride width.
- * @param[in]   paddingD    padding depth.
- * @param[in]   paddingH    padding height.
- * @param[in]   paddingW    padding width.
- * @param[out]  tgtData     output data.
- * @param[in]   tgtStride   stride between output data samples.
- *
- */
 extern void hl_avgpool3D_forward(const int frameCnt,
                                  const real* inputData,
                                  const int channels,
@@ -319,33 +240,6 @@ extern void hl_avgpool3D_forward(const int frameCnt,
                                  real* tgtData,
                                  const int tgtStride);
 
-/**
- * @brief   Maximum pool backward.
- *
- * @param[in]   frameCnt    batch size of input image.
- * @param[in]   outGrad     output grad data.
- * @param[in]   channels    number of channel.
- * @param[in]   depth      image depth.
- * @param[in]   height      image height.
- * @param[in]   width       image width.
- * @param[in]   pooledD     output image depth.
- * @param[in]   pooledH     output image height.
- * @param[in]   pooledW     output image width.
- * @param[in]   sizeZ       depth of pooling window.
- * @param[in]   sizeY       height of pooling window.
- * @param[in]   sizeX       width of pooling window.
- * @param[in]   strideD     pooling stride depth.
- * @param[in]   strideH     pooling stride height.
- * @param[in]   strideW     pooling stride width.
- * @param[in]   paddingD    padding depth.
- * @param[in]   paddingH    padding height.
- * @param[in]   paddingW    padding width.
- * @param[in]   scaleA      scale.
- * @param[in]   scaleB      scale.
- * @param[out]  backGrad    output grad.
- * @param[in]   outStride   stride between output data samples.
- *
- */
 extern void hl_avgpool3D_backward(const int frameCnt,
                                   const real* outGrad,
                                   const int channels,
diff --git a/paddle/cuda/include/stub/hl_cnn_stub.h b/paddle/cuda/include/stub/hl_cnn_stub.h
index 28f61781be..6750f537bf 100644
--- a/paddle/cuda/include/stub/hl_cnn_stub.h
+++ b/paddle/cuda/include/stub/hl_cnn_stub.h
@@ -169,9 +169,9 @@ inline void hl_avgpool3D_backward(const int frameCnt,
                                   const int strideD,
                                   const int strideH,
                                   const int strideW,
-                                  int paddingD,
-                                  int paddingH,
-                                  int paddingW,
+                                  const int paddingD,
+                                  const int paddingH,
+                                  const int paddingW,
                                   real scaleA,
                                   real scaleB,
                                   real* backGrad,
diff --git a/paddle/gserver/layers/Pool3DLayer.cpp b/paddle/gserver/layers/Pool3DLayer.cpp
index fc6b9bdd2f..40a913ebfc 100644
--- a/paddle/gserver/layers/Pool3DLayer.cpp
+++ b/paddle/gserver/layers/Pool3DLayer.cpp
@@ -58,30 +58,9 @@ size_t Pool3DLayer::getSize() {
   CHECK_EQ(inputLayers_.size(), 1UL);
 
   size_t layerSize = 0;
-  //  imgSizeD_ = inputLayers_[0]->getOutput().getFrameDepth();
-  //  imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
-  //  imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
-  if (imgSizeH_ == 0) {
-    //    imgSizeH_ = imgSizeY_;
-  }
-  if (imgSizeW_ == 0) {
-    //    imgSizeW_ = imgSize_;
-  }
-  outputD_ = outputSize(imgSizeD_,
-                        sizeZ_,
-                        paddingD_,
-                        strideD_,
-                        /* caffeMode */ false);
-  outputH_ = outputSize(imgSizeH_,
-                        sizeY_,
-                        paddingH_,
-                        strideH_,
-                        /* caffeMode */ false);
-  outputW_ = outputSize(imgSizeW_,
-                        sizeX_,
-                        paddingW_,
-                        strideW_,
-                        /* caffeMode */ false);
+  outputD_ = outputSize(imgSizeD_, sizeZ_, paddingD_, strideD_, false);
+  outputH_ = outputSize(imgSizeH_, sizeY_, paddingH_, strideH_, false);
+  outputW_ = outputSize(imgSizeW_, sizeX_, paddingW_, strideW_, false);
 
   layerSize = outputD_ * outputH_ * outputW_ * channels_;
   getOutput().setFrameHeight(outputH_);
@@ -100,37 +79,37 @@ void Pool3DLayer::forward(PassType passType) {
 
   if (poolType_ == "avg") {
     outMat->avgPool3DForward(*inMat,
+                             channels_,
                              imgSizeD_,
                              imgSizeH_,
                              imgSizeW_,
-                             channels_,
+                             outputD_,
+                             outputH_,
+                             outputW_,
                              sizeZ_,
                              sizeY_,
                              sizeX_,
                              strideD_,
                              strideH_,
                              strideW_,
-                             outputD_,
-                             outputH_,
-                             outputW_,
                              paddingD_,
                              paddingH_,
                              paddingW_);
   } else if (poolType_ == "max") {
     outMat->maxPool3DForward(*inMat,
+                             channels_,
                              imgSizeD_,
                              imgSizeH_,
                              imgSizeW_,
-                             channels_,
+                             outputD_,
+                             outputH_,
+                             outputW_,
                              sizeZ_,
                              sizeY_,
                              sizeX_,
                              strideD_,
                              strideH_,
                              strideW_,
-                             outputD_,
-                             outputH_,
-                             outputW_,
                              paddingD_,
                              paddingH_,
                              paddingW_);
@@ -155,41 +134,41 @@ void Pool3DLayer::backward(const UpdateCallback& callback) {
                                  imgSizeD_,
                                  imgSizeH_,
                                  imgSizeW_,
+                                 outputD_,
+                                 outputH_,
+                                 outputW_,
                                  sizeZ_,
                                  sizeY_,
                                  sizeZ_,
                                  strideD_,
                                  strideH_,
                                  strideW_,
-                                 outputD_,
-                                 outputH_,
-                                 outputW_,
-                                 1,
-                                 1,
                                  paddingD_,
                                  paddingH_,
-                                 paddingW_);
+                                 paddingW_,
+                                 1.0,
+                                 1.0);
   } else if (poolType_ == "max") {
     inGradMat->maxPool3DBackward(*inMat,
+                                 *outGradMat,
+                                 *outMat,
                                  imgSizeD_,
                                  imgSizeH_,
                                  imgSizeW_,
-                                 *outGradMat,
-                                 *outMat,
+                                 outputD_,
+                                 outputH_,
+                                 outputW_,
                                  sizeZ_,
                                  sizeY_,
                                  sizeZ_,
                                  strideD_,
                                  strideH_,
                                  strideW_,
-                                 outputD_,
-                                 outputH_,
-                                 outputW_,
-                                 1,
-                                 1,
                                  paddingD_,
                                  paddingH_,
-                                 paddingW_);
+                                 paddingW_,
+                                 1.0,
+                                 1.0);
   } else {
     LOG(FATAL) << "Unknown pool type: " << poolType_;
   }
diff --git a/paddle/gserver/layers/Pool3DLayer.h b/paddle/gserver/layers/Pool3DLayer.h
index afc65ac2b0..8329a02f57 100644
--- a/paddle/gserver/layers/Pool3DLayer.h
+++ b/paddle/gserver/layers/Pool3DLayer.h
@@ -44,5 +44,6 @@ protected:
   int imgSizeW_, imgSizeH_, imgSizeD_;
   int outputW_, outputH_, outputD_;
   std::string poolType_;
+  MatrixPtr maxPoolIdx_;
 };
 }  // namespace paddle
diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp
index e7f1489b8b..4f9216896c 100644
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
@@ -1191,23 +1191,23 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad,
 }
 
 void GpuMatrix::maxPool3DForward(Matrix& inputMat,
+                                 size_t channels,
                                  size_t imgSizeD,
                                  size_t imgSizeH,
                                  size_t imgSizeW,
-                                 size_t channels,
+                                 size_t outputD,
+                                 size_t outputH,
+                                 size_t outputW,
                                  size_t sizeZ,
                                  size_t sizeY,
                                  size_t sizeX,
                                  size_t strideD,
                                  size_t strideH,
                                  size_t strideW,
-                                 size_t outputD,
-                                 size_t outputH,
-                                 size_t outputW,
                                  size_t paddingD,
                                  size_t paddingH,
                                  size_t paddingW) {
-  CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal";
+  CHECK(inputMat.useGpu_) << "Matrix type are not correct";
 
   real* inputData = inputMat.getData();
   size_t num = inputMat.getHeight();
@@ -1236,32 +1236,31 @@ void GpuMatrix::maxPool3DForward(Matrix& inputMat,
                        paddingD,
                        paddingH,
                        paddingW,
-                       data_,
+                       getData(),
                        getStride());
 }
 
 void GpuMatrix::maxPool3DBackward(Matrix& inputMat,
+                                  Matrix& outGrad,
+                                  Matrix& outV,
                                   size_t imgSizeD,
                                   size_t imgSizeH,
                                   size_t imgSizeW,
-                                  Matrix& outGrad,
-                                  Matrix& outV,
+                                  size_t outputD,
+                                  size_t outputH,
+                                  size_t outputW,
                                   size_t sizeZ,
                                   size_t sizeY,
                                   size_t sizeX,
                                   size_t strideD,
                                   size_t strideH,
                                   size_t strideW,
-                                  size_t outputD,
-                                  size_t outputH,
-                                  size_t outputW,
-                                  real scaleTargets,
-                                  real scaleOutput,
                                   size_t paddingD,
                                   size_t paddingH,
-                                  size_t paddingW) {
-  CHECK(inputMat.useGpu_ == true && outGrad.useGpu_ == true &&
-        outV.useGpu_ == true)
+                                  size_t paddingW,
+                                  real scaleTargets,
+                                  real scaleOutput) {
+  CHECK(inputMat.useGpu_ && outGrad.useGpu_ && outV.useGpu_)
       << "Matrix type are not equal";
 
   real* inputData = inputMat.getData();
@@ -1300,28 +1299,28 @@ void GpuMatrix::maxPool3DBackward(Matrix& inputMat,
                         paddingW,
                         scaleTargets,
                         scaleOutput,
-                        data_,
+                        getData(),
                         outGrad.getStride());
 }
 
 void GpuMatrix::avgPool3DForward(Matrix& inputMat,
+                                 size_t channels,
                                  size_t imgSizeD,
                                  size_t imgSizeH,
                                  size_t imgSizeW,
-                                 size_t channels,
+                                 size_t outputD,
+                                 size_t outputH,
+                                 size_t outputW,
                                  size_t sizeZ,
                                  size_t sizeY,
                                  size_t sizeX,
                                  size_t strideD,
                                  size_t strideH,
                                  size_t strideW,
-                                 size_t outputD,
-                                 size_t outputH,
-                                 size_t outputW,
                                  size_t paddingD,
                                  size_t paddingH,
                                  size_t paddingW) {
-  CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal";
+  CHECK(inputMat.useGpu_) << "Matrix type are not equal";
 
   real* inputData = inputMat.getData();
   size_t frameNum = inputMat.getHeight();
@@ -1350,7 +1349,7 @@ void GpuMatrix::avgPool3DForward(Matrix& inputMat,
                        paddingD,
                        paddingH,
                        paddingW,
-                       data_,
+                       getData(),
                        getStride());
 }
 
@@ -1358,21 +1357,21 @@ void GpuMatrix::avgPool3DBackward(Matrix& outGrad,
                                   size_t imgSizeD,
                                   size_t imgSizeH,
                                   size_t imgSizeW,
+                                  size_t outputD,
+                                  size_t outputH,
+                                  size_t outputW,
                                   size_t sizeZ,
                                   size_t sizeY,
                                   size_t sizeX,
                                   size_t strideD,
                                   size_t strideH,
                                   size_t strideW,
-                                  size_t outputD,
-                                  size_t outputH,
-                                  size_t outputW,
-                                  real scaleTargets,
-                                  real scaleOutput,
                                   size_t paddingD,
                                   size_t paddingH,
-                                  size_t paddingW) {
-  CHECK(outGrad.useGpu_ == true) << "Matrix type are not equal";
+                                  size_t paddingW,
+                                  real scaleTargets,
+                                  real scaleOutput) {
+  CHECK(outGrad.useGpu_) << "Matrix type are not equal";
 
   real* outDiff = outGrad.getData();
   size_t frameNum = outGrad.getHeight();
@@ -1404,7 +1403,7 @@ void GpuMatrix::avgPool3DBackward(Matrix& outGrad,
                         paddingW,
                         scaleTargets,
                         scaleOutput,
-                        data_,
+                        getData(),
                         outGrad.getStride());
 }
 
@@ -2149,24 +2148,24 @@ void CpuMatrix::avgPoolBackward(Matrix& input,
 }
 
 void CpuMatrix::maxPool3DForward(Matrix& inputMat,
+                                 size_t channels,
                                  size_t imgSizeD,
                                  size_t imgSizeH,
                                  size_t imgSizeW,
-                                 size_t channels,
+                                 size_t outputD,
+                                 size_t outputH,
+                                 size_t outputW,
                                  size_t sizeZ,
                                  size_t sizeY,
                                  size_t sizeX,
                                  size_t strideD,
                                  size_t strideH,
                                  size_t strideW,
-                                 size_t outputD,
-                                 size_t outputH,
-                                 size_t outputW,
                                  size_t paddingD,
                                  size_t paddingH,
                                  size_t paddingW) {
   real* inputData = inputMat.getData();
-  real* outData = data_;
+  real* outData = getData();
   size_t num = inputMat.getHeight();
   size_t inWidth = imgSizeW;
   size_t inHeight = imgSizeH;
@@ -2186,7 +2185,7 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat,
   /* pool max one by one */
   for (size_t n = 0; n < num; ++n) {  // frame by frame
     if (!isContiguous()) {
-      outData = data_ + n * outStride;
+      outData = getData() + n * outStride;
     }
     for (size_t c = 0; c < channels; ++c) {  // channel by channel
       for (size_t pd = 0; pd < outputD; ++pd) {
@@ -2201,15 +2200,18 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat,
             dstart = std::max(dstart, 0);
             hstart = std::max(hstart, 0);
             wstart = std::max(wstart, 0);
+            real maxOutData = outData[(pd * outputH + ph) * outputW + pw];
             for (int d = dstart; d < dend; ++d) {
               for (int h = hstart; h < hend; ++h) {
                 for (int w = wstart; w < wend; ++w) {
-                  outData[(pd * outputH + ph) * outputW + pw] =
-                      std::max(outData[(pd * outputH + ph) * outputW + pw],
-                               inputData[(d * inHeight + h) * inWidth + w]);
+                  if (maxOutData <
+                      inputData[(d * inHeight + h) * inWidth + w]) {
+                    maxOutData = inputData[(d * inHeight + h) * inWidth + w];
+                  }
                 }
               }
             }
+            outData[(pd * outputH + ph) * outputW + pw] = maxOutData;
           }
         }
       }
@@ -2221,25 +2223,25 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat,
 }
 
 void CpuMatrix::maxPool3DBackward(Matrix& image,
+                                  Matrix& outGrad,
+                                  Matrix& outV,
                                   size_t imgSizeD,
                                   size_t imgSizeH,
                                   size_t imgSizeW,
-                                  Matrix& outGrad,
-                                  Matrix& outV,
+                                  size_t outputD,
+                                  size_t outputH,
+                                  size_t outputW,
                                   size_t sizeZ,
                                   size_t sizeY,
                                   size_t sizeX,
                                   size_t strideD,
                                   size_t strideH,
                                   size_t strideW,
-                                  size_t outputD,
-                                  size_t outputH,
-                                  size_t outputW,
-                                  real scaleTargets,
-                                  real scaleOutput,
                                   size_t paddingD,
                                   size_t paddingH,
-                                  size_t paddingW) {
+                                  size_t paddingW,
+                                  real scaleTargets,
+                                  real scaleOutput) {
   size_t num = image.getHeight();
   size_t channels = size_t(width_ / imgSizeD / imgSizeH / imgSizeW);
   CHECK(image.getWidth() == imgSizeD * imgSizeH * imgSizeW * channels);
@@ -2247,19 +2249,18 @@ void CpuMatrix::maxPool3DBackward(Matrix& image,
   CHECK(outV.getHeight() == outGrad.getHeight() &&
         outV.getWidth() == outGrad.getWidth());
 
-  real* tgtGrad = data_;
+  real* tgtGrad = getData();
   real* inData = image.getData();
   real* otData = outV.getData();
   real* otGrad = outGrad.getData();
 
   size_t outStride = outV.getStride();
-  real* origOutData = otData;
-  real* origOutGrad = otGrad;
+  ;
 
   for (size_t n = 0; n < num; ++n) {
     if (!outV.isContiguous()) {
-      otData = origOutData + n * outStride;
-      otGrad = origOutGrad + n * outStride;
+      otData = outV.getData() + n * outStride;
+      otGrad = outGrad.getData() + n * outStride;
     }
     for (size_t c = 0; c < channels; ++c) {
       for (size_t pd = 0; pd < outputD; ++pd) {
@@ -2274,7 +2275,7 @@ void CpuMatrix::maxPool3DBackward(Matrix& image,
             dstart = std::max(dstart, 0);
             hstart = std::max(hstart, 0);
             wstart = std::max(wstart, 0);
-            for (int d = 0; d < dend; ++d) {
+            for (int d = dstart; d < dend; ++d) {
               for (int h = hstart; h < hend; ++h) {
                 for (int w = wstart; w < wend; ++w) {
                   tgtGrad[(d * imgSizeH + h) * imgSizeW + w] =
@@ -2299,19 +2300,19 @@ void CpuMatrix::maxPool3DBackward(Matrix& image,
 }
 
 void CpuMatrix::avgPool3DForward(Matrix& input,
+                                 size_t channels,
                                  size_t imgSizeD,
                                  size_t imgSizeH,
                                  size_t imgSizeW,
-                                 size_t channels,
+                                 size_t outputD,
+                                 size_t outputH,
+                                 size_t outputW,
                                  size_t sizeZ,
                                  size_t sizeY,
                                  size_t sizeX,
                                  size_t strideD,
                                  size_t strideH,
                                  size_t strideW,
-                                 size_t outputD,
-                                 size_t outputH,
-                                 size_t outputW,
                                  size_t paddingD,
                                  size_t paddingH,
                                  size_t paddingW) {
@@ -2322,7 +2323,7 @@ void CpuMatrix::avgPool3DForward(Matrix& input,
   size_t inWidth = imgSizeW;
   CHECK(inDepth * inHeight * inWidth * channels == input.getWidth());
   CHECK(outputD * outputH * outputW * channels * num == height_ * width_);
-  real* tgtData = data_;
+  real* tgtData = getData();
   real* inData = input.getData();
 
   for (size_t n = 0; n < num; ++n) {
@@ -2372,20 +2373,20 @@ void CpuMatrix::avgPool3DBackward(Matrix& input,
                                   size_t imgSizeD,
                                   size_t imgSizeH,
                                   size_t imgSizeW,
+                                  size_t outputD,
+                                  size_t outputH,
+                                  size_t outputW,
                                   size_t sizeZ,
                                   size_t sizeY,
                                   size_t sizeX,
                                   size_t strideD,
                                   size_t strideH,
                                   size_t strideW,
-                                  size_t outputD,
-                                  size_t outputH,
-                                  size_t outputW,
-                                  real scaleTargets,
-                                  real scaleOutput,
                                   size_t paddingD,
                                   size_t paddingH,
-                                  size_t paddingW) {
+                                  size_t paddingW,
+                                  real scaleTargets,
+                                  real scaleOutput) {
   size_t num = input.getHeight();
   size_t channels = input.getWidth() / outputD / outputH / outputW;
   CHECK(imgSizeD * imgSizeH * imgSizeW * channels == getWidth());
diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h
index f1534c5ea0..dec9702433 100644
--- a/paddle/math/Matrix.h
+++ b/paddle/math/Matrix.h
@@ -933,19 +933,19 @@ public:
    * in the sizeX of value
    */
   virtual void maxPool3DForward(Matrix& inputMat,
+                                size_t channels,
                                 size_t imgSizeD,
                                 size_t imgSizeH,
                                 size_t imgSizeW,
-                                size_t channels,
+                                size_t outputD,
+                                size_t outputH,
+                                size_t outputW,
                                 size_t sizeZ,
                                 size_t sizeY,
                                 size_t sizeX,
                                 size_t strideD,
                                 size_t strideH,
                                 size_t strideW,
-                                size_t outputD,
-                                size_t outputH,
-                                size_t outputW,
                                 size_t paddingD,
                                 size_t paddingH,
                                 size_t paddingW) {
@@ -953,42 +953,42 @@ public:
   }
 
   virtual void maxPool3DBackward(Matrix& image,
+                                 Matrix& outGrad,
+                                 Matrix& outV,
                                  size_t imgSizeD,
                                  size_t imgSizeH,
                                  size_t imgSizeW,
-                                 Matrix& outGrad,
-                                 Matrix& outV,
+                                 size_t outputD,
+                                 size_t outputH,
+                                 size_t outputW,
                                  size_t sizeZ,
                                  size_t sizeY,
                                  size_t sizeX,
                                  size_t strideD,
                                  size_t strideH,
                                  size_t strideW,
-                                 size_t outputD,
-                                 size_t outputH,
-                                 size_t outputW,
-                                 real scaleTargets,
-                                 real scaleOutput,
                                  size_t paddingD,
                                  size_t paddingH,
-                                 size_t paddingW) {
+                                 size_t paddingW,
+                                 real scaleTargets,
+                                 real scaleOutput) {
     LOG(FATAL) << "Not implemeted";
   }
 
   virtual void avgPool3DForward(Matrix& input,
+                                size_t channels,
                                 size_t imgSizeD,
                                 size_t imgSizeH,
                                 size_t imgSizeW,
-                                size_t channels,
+                                size_t outputD,
+                                size_t outputH,
+                                size_t outputW,
                                 size_t sizeZ,
                                 size_t sizeY,
                                 size_t sizeX,
                                 size_t strideD,
                                 size_t strideH,
                                 size_t strideW,
-                                size_t outputD,
-                                size_t outputH,
-                                size_t outputW,
                                 size_t paddingD,
                                 size_t paddingH,
                                 size_t paddingW) {
@@ -999,20 +999,20 @@ public:
                                  size_t imgSizeD,
                                  size_t imgSizeH,
                                  size_t imgSizeW,
+                                 size_t outputD,
+                                 size_t outputH,
+                                 size_t outputW,
                                  size_t sizeZ,
                                  size_t sizeY,
                                  size_t sizeX,
                                  size_t strideD,
                                  size_t strideH,
                                  size_t strideW,
-                                 size_t outputD,
-                                 size_t outputH,
-                                 size_t outputW,
-                                 real scaleTargets,
-                                 real scaleOutput,
                                  size_t paddingD,
                                  size_t paddingH,
-                                 size_t paddingW) {
+                                 size_t paddingW,
+                                 real scaleTargets,
+                                 real scaleOutput) {
     LOG(FATAL) << "Not implemeted";
   }
 
@@ -1435,60 +1435,59 @@ public:
                        size_t paddingH,
                        size_t paddingW);
 
-  /////////////////////////
   void maxPool3DForward(Matrix& inputMat,
+                        size_t channels,
                         size_t imgSizeD,
                         size_t imgSizeH,
                         size_t imgSizeW,
-                        size_t channels,
+                        size_t outputD,
+                        size_t outputH,
+                        size_t outputW,
                         size_t sizeZ,
                         size_t sizeY,
                         size_t sizeX,
                         size_t strideD,
                         size_t strideH,
                         size_t strideW,
-                        size_t outputD,
-                        size_t outputH,
-                        size_t outputW,
                         size_t paddingD,
                         size_t paddingH,
                         size_t paddingW);
 
   void maxPool3DBackward(Matrix& image,
+                         Matrix& outGrad,
+                         Matrix& outV,
                          size_t imgSizeD,
                          size_t imgSizeH,
                          size_t imgSizeW,
-                         Matrix& outGrad,
-                         Matrix& outV,
+                         size_t outputD,
+                         size_t outputH,
+                         size_t outputW,
                          size_t sizeZ,
                          size_t sizeY,
                          size_t sizeX,
                          size_t strideD,
                          size_t strideH,
                          size_t strideW,
-                         size_t outputD,
-                         size_t outputH,
-                         size_t outputW,
-                         real scaleTargets,
-                         real scaleOutput,
                          size_t paddingD,
                          size_t paddingH,
-                         size_t paddingW);
+                         size_t paddingW,
+                         real scaleTargets,
+                         real scaleOutput);
 
   void avgPool3DForward(Matrix& input,
+                        size_t channels,
                         size_t imgSizeD,
                         size_t imgSizeH,
                         size_t imgSizeW,
-                        size_t channels,
+                        size_t outputD,
+                        size_t outputH,
+                        size_t outputW,
                         size_t sizeZ,
                         size_t sizeY,
                         size_t sizeX,
                         size_t strideD,
                         size_t strideH,
                         size_t strideW,
-                        size_t outputD,
-                        size_t outputH,
-                        size_t outputW,
                         size_t paddingD,
                         size_t paddingH,
                         size_t paddingW);
@@ -1497,20 +1496,20 @@ public:
                          size_t imgSizeD,
                          size_t imgSizeH,
                          size_t imgSizeW,
+                         size_t outputD,
+                         size_t outputH,
+                         size_t outputW,
                          size_t sizeZ,
                          size_t sizeY,
                          size_t sizeX,
                          size_t strideD,
                          size_t strideH,
                          size_t strideW,
-                         size_t outputD,
-                         size_t outputH,
-                         size_t outputW,
-                         real scaleTargets,
-                         real scaleOutput,
                          size_t paddingD,
                          size_t paddingH,
-                         size_t paddingW);
+                         size_t paddingW,
+                         real scaleTargets,
+                         real scaleOutput);
 
   void maxSequenceForward(Matrix& input,
                           const IVector& sequence,
@@ -1670,60 +1669,60 @@ public:
                        real scaleOutput,
                        size_t paddingH,
                        size_t paddingW);
-  //////////////////////
+
   void maxPool3DForward(Matrix& inputMat,
+                        size_t channels,
                         size_t imgSizeD,
                         size_t imgSizeH,
                         size_t imgSizeW,
-                        size_t channels,
+                        size_t outputD,
+                        size_t outputH,
+                        size_t outputW,
                         size_t sizeZ,
                         size_t sizeY,
                         size_t sizeX,
                         size_t strideD,
                         size_t strideH,
                         size_t strideW,
-                        size_t outputD,
-                        size_t outputH,
-                        size_t outputW,
                         size_t paddingD,
                         size_t paddingH,
                         size_t paddingW);
 
   void maxPool3DBackward(Matrix& image,
+                         Matrix& outGrad,
+                         Matrix& outV,
                          size_t imgSizeD,
                          size_t imgSizeH,
                          size_t imgSizeW,
-                         Matrix& outGrad,
-                         Matrix& outV,
+                         size_t outputD,
+                         size_t outputH,
+                         size_t outputW,
                          size_t sizeZ,
                          size_t sizeY,
                          size_t sizeX,
                          size_t strideD,
                          size_t strideH,
                          size_t strideW,
-                         size_t outputD,
-                         size_t outputH,
-                         size_t outputW,
-                         real scaleTargets,
-                         real scaleOutput,
                          size_t paddingD,
                          size_t paddingH,
-                         size_t paddingW);
+                         size_t paddingW,
+                         real scaleTargets,
+                         real scaleOutput);
 
   void avgPool3DForward(Matrix& input,
+                        size_t channels,
                         size_t imgSizeD,
                         size_t imgSizeH,
                         size_t imgSizeW,
-                        size_t channels,
+                        size_t outputD,
+                        size_t outputH,
+                        size_t outputW,
                         size_t sizeZ,
                         size_t sizeY,
                         size_t sizeX,
                         size_t strideD,
                         size_t strideH,
                         size_t strideW,
-                        size_t outputD,
-                        size_t outputH,
-                        size_t outputW,
                         size_t paddingD,
                         size_t paddingH,
                         size_t paddingW);
@@ -1732,20 +1731,20 @@ public:
                          size_t imgSizeD,
                          size_t imgSizeH,
                          size_t imgSizeW,
+                         size_t outputD,
+                         size_t outputH,
+                         size_t outputW,
                          size_t sizeZ,
                          size_t sizeY,
                          size_t sizeX,
                          size_t strideD,
                          size_t strideH,
                          size_t strideW,
-                         size_t outputD,
-                         size_t outputH,
-                         size_t outputW,
-                         real scaleTargets,
-                         real scaleOutput,
                          size_t paddingD,
                          size_t paddingH,
-                         size_t paddingW);
+                         size_t paddingW,
+                         real scaleTargets,
+                         real scaleOutput);
 
   void maxSequenceForward(Matrix& input,
                           const IVector& sequence,
diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp
index 7a961d2751..21ee8543cd 100644
--- a/paddle/math/tests/test_matrixCompare.cpp
+++ b/paddle/math/tests/test_matrixCompare.cpp
@@ -1204,7 +1204,6 @@ TEST(Matrix, warpCTC) {
   }
 }
 
-/////
 void testMatrixPool3D(int depth, int height, int width) {
   int channel = 3;
   int filterX = 3, filterY = 4, filterZ = 5;
@@ -1226,38 +1225,37 @@ void testMatrixPool3D(int depth, int height, int width) {
 
   cpuImage->randomizeUniform();
   gpuImage->copyFrom(*cpuImage);
-  // std::cout << "test maxPool3DForward...\n";
   cpuOutput->maxPool3DForward(*cpuImage,
+                              channel,
                               depth,
                               height,
                               width,
-                              channel,
+                              outD,
+                              outH,
+                              outW,
                               filterZ,
                               filterY,
                               filterX,
                               strideZ,
                               strideY,
                               strideX,
-                              outD,
-                              outH,
-                              outW,
                               padZ,
                               padY,
                               padX);
   gpuOutput->maxPool3DForward(*gpuImage,
+                              channel,
                               depth,
                               height,
                               width,
-                              channel,
+                              outD,
+                              outH,
+                              outW,
                               filterZ,
                               filterY,
                               filterX,
                               strideZ,
                               strideY,
                               strideX,
-                              outD,
-                              outH,
-                              outW,
                               padZ,
                               padY,
                               padX);
@@ -1265,39 +1263,38 @@ void testMatrixPool3D(int depth, int height, int width) {
 
   cpuImage->randomizeUniform();
   gpuImage->copyFrom(*cpuImage);
-  // std::cout << "test avgPool3DForward...\n";
   cpuOutput->avgPool3DForward(*cpuImage,
+                              channel,
                               depth,
                               height,
                               width,
-                              channel,
+                              outD,
+                              outH,
+                              outW,
                               filterZ,
                               filterY,
                               filterX,
                               strideZ,
                               strideY,
                               strideX,
-                              outD,
-                              outH,
-                              outW,
                               padZ,
                               padY,
                               padX);
 
   gpuOutput->avgPool3DForward(*gpuImage,
+                              channel,
                               depth,
                               height,
                               width,
-                              channel,
+                              outD,
+                              outH,
+                              outW,
                               filterZ,
                               filterY,
                               filterX,
                               strideZ,
                               strideY,
                               strideX,
-                              outD,
-                              outH,
-                              outW,
                               padZ,
                               padY,
                               padX);
@@ -1306,98 +1303,96 @@ void testMatrixPool3D(int depth, int height, int width) {
   gpuImage->copyFrom(*cpuImage);
   cpuOutput->randomizeUniform();
   gpuOutput->copyFrom(*cpuOutput);
-  // std::cout << "test avgPool3DBackward...\n";
   cpuImage->avgPool3DBackward(*cpuOutput,
                               depth,
                               height,
                               width,
+                              outD,
+                              outH,
+                              outW,
                               filterZ,
                               filterY,
                               filterX,
                               strideZ,
                               strideY,
                               strideX,
-                              outD,
-                              outH,
-                              outW,
-                              1,
-                              1,
                               padZ,
                               padY,
-                              padX);
+                              padX,
+                              1.0,
+                              1.0);
 
   gpuImage->avgPool3DBackward(*gpuOutput,
                               depth,
                               height,
                               width,
+                              outD,
+                              outH,
+                              outW,
                               filterZ,
                               filterY,
                               filterX,
                               strideZ,
                               strideY,
                               strideX,
-                              outD,
-                              outH,
-                              outW,
-                              1,
-                              1,
                               padZ,
                               padY,
-                              padX);
+                              padX,
+                              1.0,
+                              1.0);
   TensorCheckErr(*cpuImage, *gpuImage);
 
   cpuImage->randomizeUniform();
   gpuImage->copyFrom(*cpuImage);
   cpuOutput->randomizeUniform();
   gpuOutput->copyFrom(*cpuOutput);
-  // std::cout << "test maxPool3DBackward...\n";
   cpuImage->maxPool3DBackward(*cpuImage,
+                              *cpuOutput,
+                              *cpuOutput,
                               depth,
                               height,
                               width,
-                              *cpuOutput,
-                              *cpuOutput,
+                              outD,
+                              outH,
+                              outW,
                               filterZ,
                               filterY,
                               filterX,
                               strideZ,
                               strideY,
                               strideX,
-                              outD,
-                              outH,
-                              outW,
-                              1,
-                              1,
                               padZ,
                               padY,
-                              padX);
+                              padX,
+                              1.0,
+                              1.0);
 
   gpuImage->maxPool3DBackward(*gpuImage,
+                              *gpuOutput,
+                              *gpuOutput,
                               depth,
                               height,
                               width,
-                              *gpuOutput,
-                              *gpuOutput,
+                              outD,
+                              outH,
+                              outW,
                               filterZ,
                               filterY,
                               filterX,
                               strideZ,
                               strideY,
                               strideX,
-                              outD,
-                              outH,
-                              outW,
-                              1,
-                              1,
                               padZ,
                               padY,
-                              padX);
+                              padX,
+                              1.0,
+                              1.0);
   TensorCheckErr(*cpuImage, *gpuImage);
 }
 
 TEST(Matrix, Pool3D) {
-  for (auto depth : {9, 16, 64, 128}) {
-    for (auto height : {9, 11, 128, 256}) {
+  for (auto depth : {9, 16, 64}) {
+    for (auto height : {9, 11, 128}) {
       for (auto width : {9, 32, 128}) {
         VLOG(3) << "depth=" << depth << " height=" << height
                 << " width=" << width;

From 860bf192c904627ee0446051fe97911eb11895ad Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Thu, 24 Aug 2017 19:28:56 +0800
Subject: [PATCH 093/170] Add maxPoolIdx

---
 paddle/cuda/include/hl_cnn.h             |   4 +-
 paddle/cuda/include/stub/hl_cnn_stub.h   |   4 +-
 paddle/cuda/src/hl_cuda_cnn.cu           |  73 +--
 paddle/gserver/layers/Pool3DLayer.cpp    |  11 +-
 paddle/math/Matrix.cpp                   |  86 ++--
 paddle/math/Matrix.h                     |  18 +-
 paddle/math/tests/test_matrixCompare.cpp | 564 +++++++++++++++--------
 7 files changed, 473 insertions(+), 287 deletions(-)

diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/cuda/include/hl_cnn.h
index 84f1c84359..6b56d9ec8d 100644
--- a/paddle/cuda/include/hl_cnn.h
+++ b/paddle/cuda/include/hl_cnn.h
@@ -192,11 +192,10 @@ extern void hl_maxpool3D_forward(const int frameCnt,
                                  const int paddingH,
                                  const int paddingW,
                                  real* tgtData,
+                                 real* maxPoolIdxData,
                                  const int tgtStride);
 
 extern void hl_maxpool3D_backward(const int frameCnt,
-                                  const real* inputData,
-                                  const real* outData,
                                   const real* outGrad,
                                   const int channels,
                                   const int depth,
@@ -217,6 +216,7 @@ extern void hl_maxpool3D_backward(const int frameCnt,
                                   real scaleA,
                                   real scaleB,
                                   real* targetGrad,
+                                  real* maxPoolIdxData,
                                   const int outStride);
 
 extern void hl_avgpool3D_forward(const int frameCnt,
diff --git a/paddle/cuda/include/stub/hl_cnn_stub.h b/paddle/cuda/include/stub/hl_cnn_stub.h
index 6750f537bf..a76dbf0b65 100644
--- a/paddle/cuda/include/stub/hl_cnn_stub.h
+++ b/paddle/cuda/include/stub/hl_cnn_stub.h
@@ -106,11 +106,10 @@ inline void hl_maxpool3D_forward(const int frameCnt,
                                  const int paddingH,
                                  const int paddingW,
                                  real* tgtData,
+                                 real* maxPoolIdxData,
                                  const int tgtStride) {}
 
 inline void hl_maxpool3D_backward(const int frameCnt,
-                                  const real* inputData,
-                                  const real* outData,
                                   const real* outGrad,
                                   const int channels,
                                   const int depth,
@@ -131,6 +130,7 @@ inline void hl_maxpool3D_backward(const int frameCnt,
                                   real scaleA,
                                   real scaleB,
                                   real* targetGrad,
+                                  real* maxPoolIdxData,
                                   const int outStride) {}
 
 inline void hl_avgpool3D_forward(const int frameCnt,
diff --git a/paddle/cuda/src/hl_cuda_cnn.cu b/paddle/cuda/src/hl_cuda_cnn.cu
index 458c347728..95440c9446 100644
--- a/paddle/cuda/src/hl_cuda_cnn.cu
+++ b/paddle/cuda/src/hl_cuda_cnn.cu
@@ -366,10 +366,11 @@ __global__ void KeMaxPool3DForward(const int nthreads,
                                    const int strideD,
                                    const int strideH,
                                    const int strideW,
-                                   const int offsetD,
-                                   const int offsetH,
-                                   const int offsetW,
+                                   const int padD,
+                                   const int padH,
+                                   const int padW,
                                    real* tgtData,
+                                   real* maxPoolIdxData,
                                    const int tgtStride) {
   for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads);
        index += blockDim.x * gridDim.x) {
@@ -378,9 +379,9 @@ __global__ void KeMaxPool3DForward(const int nthreads,
     int pd = (index / pooledW / pooledH) % pooledD;
     int c = (index / pooledW / pooledH / pooledD) % channels;
     int frameNum = index / pooledW / pooledH / pooledD / channels;
-    int dstart = pd * strideD - offsetD;
-    int hstart = ph * strideH - offsetH;
-    int wstart = pw * strideW - offsetW;
+    int dstart = pd * strideD - padD;
+    int hstart = ph * strideH - padH;
+    int wstart = pw * strideW - padW;
     int dend = min(dstart + ksizeD, depth);
     int hend = min(hstart + ksizeH, height);
     int wend = min(wstart + ksizeW, width);
@@ -388,18 +389,22 @@ __global__ void KeMaxPool3DForward(const int nthreads,
     hstart = max(hstart, 0);
     wstart = max(wstart, 0);
     real maxval = -FLT_MAX;
+    int maxIdx = -1;
     inputData += (frameNum * channels + c) * depth * height * width;
     for (int d = dstart; d < dend; ++d) {
       for (int h = hstart; h < hend; ++h) {
         for (int w = wstart; w < wend; ++w) {
-          if (maxval < inputData[(d * height + h) * width + w])
+          if (maxval < inputData[(d * height + h) * width + w]) {
             maxval = inputData[(d * height + h) * width + w];
+            maxIdx = (d * height + h) * width + w;
+          }
         }
       }
     }
     int tgtIndex =
         index % (pooledW * pooledH * pooledD * channels) + frameNum * tgtStride;
     tgtData[tgtIndex] = maxval;
+    maxPoolIdxData[tgtIndex] = maxIdx;
   }
 }
 
@@ -418,10 +423,11 @@ void hl_maxpool3D_forward(const int frameCnt,
                           const int strideD,
                           const int strideH,
                           const int strideW,
-                          const int paddingD,
-                          const int paddingH,
-                          const int paddingW,
+                          const int padD,
+                          const int padH,
+                          const int padW,
                           real* tgtData,
+                          real* maxPoolIdxData,
                           const int tgtStride) {
   int num_kernels = pooledD * pooledH * pooledW * channels * frameCnt;
   int blocks = (num_kernels + 1024 - 1) / 1024;
@@ -443,17 +449,16 @@ void hl_maxpool3D_forward(const int frameCnt,
                                                            strideD,
                                                            strideH,
                                                            strideW,
-                                                           paddingD,
-                                                           paddingH,
-                                                           paddingW,
+                                                           padD,
+                                                           padH,
+                                                           padW,
                                                            tgtData,
+                                                           maxPoolIdxData,
                                                            tgtStride);
   CHECK_SYNC("hl_maxpool3D_forward failed");
 }
 
 __global__ void KeMaxPool3DBackward(const int nthreads,
-                                    const real* inputData,
-                                    const real* outData,
                                     const real* outGrad,
                                     const int channels,
                                     const int depth,
@@ -474,33 +479,35 @@ __global__ void KeMaxPool3DBackward(const int nthreads,
                                     real scaleA,
                                     real scaleB,
                                     real* targetGrad,
+                                    real* maxPoolIdxData,
                                     const int outStride) {
   for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads);
        index += blockDim.x * gridDim.x) {
-    // find out the local index
-    // find out the local offset
-    int offsetW = index % width + padW;
-    int offsetH = (index / width) % height + padH;
-    int offsetD = (index / width / height) % depth + padD;
+    int offsetW = index % width;
+    int offsetH = (index / width) % height;
+    int offsetD = (index / width / height) % depth;
     int offsetC = (index / width / height / depth) % channels;
     int frameNum = index / width / height / depth / channels;
 
-    int pdstart = (offsetD < sizeZ) ? 0 : (offsetD - sizeZ) / strideD + 1;
-    int phstart = (offsetH < sizeY) ? 0 : (offsetH - sizeY) / strideH + 1;
-    int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / strideW + 1;
-    int pdend = min(offsetD / strideD + 1, pooledD);
-    int phend = min(offsetH / strideH + 1, pooledH);
-    int pwend = min(offsetW / strideW + 1, pooledW);
+    int pdstart =
+        (offsetD + padD < sizeZ) ? 0 : (offsetD + padD - sizeZ) / strideD + 1;
+    int phstart =
+        (offsetH + padH < sizeY) ? 0 : (offsetH + padH - sizeY) / strideH + 1;
+    int pwstart =
+        (offsetW + padW < sizeX) ? 0 : (offsetW + padW - sizeX) / strideW + 1;
+    int pdend = min((offsetD + padD) / strideD + 1, pooledD);
+    int phend = min((offsetH + padH) / strideH + 1, pooledH);
+    int pwend = min((offsetW + padW) / strideW + 1, pooledW);
 
     real gradient = 0;
-    real input = inputData[index];
-
-    outData += ((frameNum * channels + offsetC) * pooledD * pooledH * pooledW);
     outGrad += ((frameNum * channels + offsetC) * pooledD * pooledH * pooledW);
+    maxPoolIdxData +=
+        ((frameNum * channels + offsetC) * pooledD * pooledH * pooledW);
     for (int pd = pdstart; pd < pdend; ++pd) {
       for (int ph = phstart; ph < phend; ++ph) {
         for (int pw = pwstart; pw < pwend; ++pw) {
-          if (input == outData[(pd * pooledH + ph) * pooledW + pw])
+          if (((offsetD * height + offsetH) * width + offsetW) ==
+              maxPoolIdxData[(pd * pooledH + ph) * pooledW + pw])
             gradient += outGrad[(pd * pooledH + ph) * pooledW + pw];
         }
       }
@@ -510,8 +517,6 @@ __global__ void KeMaxPool3DBackward(const int nthreads,
 }
 
 void hl_maxpool3D_backward(const int frameCnt,
-                           const real* inputData,
-                           const real* outData,
                            const real* outGrad,
                            const int channels,
                            const int depth,
@@ -532,13 +537,12 @@ void hl_maxpool3D_backward(const int frameCnt,
                            real scaleA,
                            real scaleB,
                            real* targetGrad,
+                           real* maxPoolIdxData,
                            const int outStride) {
   int num_kernels = depth * height * width * channels * frameCnt;
   int blocks = (num_kernels + 1024 - 1) / 1024;
 
   KeMaxPool3DBackward<<<blocks, 1024, 0, STREAM_DEFAULT>>>(num_kernels,
-                                                           inputData,
-                                                           outData,
                                                            outGrad,
                                                            channels,
                                                            depth,
@@ -559,6 +563,7 @@ void hl_maxpool3D_backward(const int frameCnt,
                                                            scaleA,
                                                            scaleB,
                                                            targetGrad,
+                                                           maxPoolIdxData,
                                                            outStride);
   CHECK_SYNC("hl_maxpool3D_backward");
 }
diff --git a/paddle/gserver/layers/Pool3DLayer.cpp b/paddle/gserver/layers/Pool3DLayer.cpp
index 40a913ebfc..199f21adb1 100644
--- a/paddle/gserver/layers/Pool3DLayer.cpp
+++ b/paddle/gserver/layers/Pool3DLayer.cpp
@@ -72,9 +72,10 @@ size_t Pool3DLayer::getSize() {
 void Pool3DLayer::forward(PassType passType) {
   Layer::forward(passType);
   const MatrixPtr& inMat = inputLayers_[0]->getOutputValue();
-  int batchSize = inMat->getHeight();
-  int outWidth = getSize();
+  size_t batchSize = inMat->getHeight();
+  size_t outWidth = getSize();
   resetOutput(batchSize, outWidth);
+  Matrix::resizeOrCreate(maxPoolIdx_, batchSize, outWidth, false, useGpu_);
   const MatrixPtr outMat = getOutputValue();
 
   if (poolType_ == "avg") {
@@ -97,6 +98,7 @@ void Pool3DLayer::forward(PassType passType) {
                              paddingW_);
   } else if (poolType_ == "max") {
     outMat->maxPool3DForward(*inMat,
+                             *maxPoolIdx_,
                              channels_,
                              imgSizeD_,
                              imgSizeH_,
@@ -149,9 +151,8 @@ void Pool3DLayer::backward(const UpdateCallback& callback) {
                                  1.0,
                                  1.0);
   } else if (poolType_ == "max") {
-    inGradMat->maxPool3DBackward(*inMat,
-                                 *outGradMat,
-                                 *outMat,
+    inGradMat->maxPool3DBackward(*outGradMat,
+                                 *maxPoolIdx_,
                                  imgSizeD_,
                                  imgSizeH_,
                                  imgSizeW_,
diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp
index 4f9216896c..54c2eae475 100644
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
@@ -1191,6 +1191,7 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad,
 }
 
 void GpuMatrix::maxPool3DForward(Matrix& inputMat,
+                                 Matrix& maxPoolIdx,
                                  size_t channels,
                                  size_t imgSizeD,
                                  size_t imgSizeH,
@@ -1210,6 +1211,7 @@ void GpuMatrix::maxPool3DForward(Matrix& inputMat,
   CHECK(inputMat.useGpu_) << "Matrix type are not correct";
 
   real* inputData = inputMat.getData();
+  real* maxPoolIdxData = maxPoolIdx.getData();
   size_t num = inputMat.getHeight();
   size_t width = imgSizeW;
   size_t height = imgSizeH;
@@ -1237,12 +1239,12 @@ void GpuMatrix::maxPool3DForward(Matrix& inputMat,
                        paddingH,
                        paddingW,
                        getData(),
+                       maxPoolIdxData,
                        getStride());
 }
 
-void GpuMatrix::maxPool3DBackward(Matrix& inputMat,
-                                  Matrix& outGrad,
-                                  Matrix& outV,
+void GpuMatrix::maxPool3DBackward(Matrix& outGrad,
+                                  Matrix& maxPoolIdx,
                                   size_t imgSizeD,
                                   size_t imgSizeH,
                                   size_t imgSizeW,
@@ -1260,26 +1262,21 @@ void GpuMatrix::maxPool3DBackward(Matrix& inputMat,
                                   size_t paddingW,
                                   real scaleTargets,
                                   real scaleOutput) {
-  CHECK(inputMat.useGpu_ && outGrad.useGpu_ && outV.useGpu_)
-      << "Matrix type are not equal";
+  CHECK(outGrad.useGpu_ && maxPoolIdx.useGpu_) << "Matrix type are not equal";
 
-  real* inputData = inputMat.getData();
-  real* outData = outV.getData();
   real* outDiff = outGrad.getData();
-  size_t frameNum = inputMat.getHeight();
-  size_t channels = outV.getWidth() / outputD / outputH / outputW;
+  real* maxPoolIdxData = maxPoolIdx.getData();
+  size_t frameNum = getHeight();
+  size_t channels = outGrad.getWidth() / outputD / outputH / outputW;
   size_t width = imgSizeW;
   size_t height = imgSizeH;
   size_t depth = imgSizeD;
-  CHECK(depth * height * width * channels == inputMat.getWidth());
-  CHECK(height_ == inputMat.getHeight());
+  CHECK(depth * height * width * channels == getWidth());
   CHECK(width_ == depth * width * height * channels);
-  CHECK(outGrad.getHeight() == outV.getHeight() &&
-        outGrad.getWidth() == outV.getWidth());
+  CHECK(outGrad.getHeight() == maxPoolIdx.getHeight() &&
+        outGrad.getWidth() == maxPoolIdx.getWidth());
 
   hl_maxpool3D_backward(frameNum,
-                        inputData,
-                        outData,
                         outDiff,
                         channels,
                         depth,
@@ -1300,6 +1297,7 @@ void GpuMatrix::maxPool3DBackward(Matrix& inputMat,
                         scaleTargets,
                         scaleOutput,
                         getData(),
+                        maxPoolIdxData,
                         outGrad.getStride());
 }
 
@@ -2148,6 +2146,7 @@ void CpuMatrix::avgPoolBackward(Matrix& input,
 }
 
 void CpuMatrix::maxPool3DForward(Matrix& inputMat,
+                                 Matrix& maxPoolIdx,
                                  size_t channels,
                                  size_t imgSizeD,
                                  size_t imgSizeH,
@@ -2166,6 +2165,7 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat,
                                  size_t paddingW) {
   real* inputData = inputMat.getData();
   real* outData = getData();
+  real* maxPoolIdxData = maxPoolIdx.getData();
   size_t num = inputMat.getHeight();
   size_t inWidth = imgSizeW;
   size_t inHeight = imgSizeH;
@@ -2179,6 +2179,7 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat,
   for (size_t i = 0; i < height_; i++) {
     for (size_t j = 0; j < width_; j++) {
       outData[(i)*outStride + j] = -(real)FLT_MAX;
+      maxPoolIdxData[(i)*outStride + j] = -1;
     }
   }
 
@@ -2186,6 +2187,7 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat,
   for (size_t n = 0; n < num; ++n) {  // frame by frame
     if (!isContiguous()) {
       outData = getData() + n * outStride;
+      maxPoolIdxData = maxPoolIdx.getData() + n * outStride;
     }
     for (size_t c = 0; c < channels; ++c) {  // channel by channel
       for (size_t pd = 0; pd < outputD; ++pd) {
@@ -2200,6 +2202,7 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat,
             dstart = std::max(dstart, 0);
             hstart = std::max(hstart, 0);
             wstart = std::max(wstart, 0);
+            int maxIdx = -1;
             real maxOutData = outData[(pd * outputH + ph) * outputW + pw];
             for (int d = dstart; d < dend; ++d) {
               for (int h = hstart; h < hend; ++h) {
@@ -2207,24 +2210,26 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat,
                   if (maxOutData <
                       inputData[(d * inHeight + h) * inWidth + w]) {
                     maxOutData = inputData[(d * inHeight + h) * inWidth + w];
+                    maxIdx = (d * inHeight + h) * inWidth + w;
                   }
                 }
               }
             }
             outData[(pd * outputH + ph) * outputW + pw] = maxOutData;
+            maxPoolIdxData[(pd * outputH + ph) * outputW + pw] = maxIdx;
           }
         }
       }
       // compute offset
       inputData += inDepth * inHeight * inWidth;
       outData += outputD * outputH * outputW;
+      maxPoolIdxData += outputD * outputH * outputW;
     }
   }
 }
 
-void CpuMatrix::maxPool3DBackward(Matrix& image,
-                                  Matrix& outGrad,
-                                  Matrix& outV,
+void CpuMatrix::maxPool3DBackward(Matrix& outGrad,
+                                  Matrix& maxPoolIdx,
                                   size_t imgSizeD,
                                   size_t imgSizeH,
                                   size_t imgSizeW,
@@ -2242,59 +2247,38 @@ void CpuMatrix::maxPool3DBackward(Matrix& image,
                                   size_t paddingW,
                                   real scaleTargets,
                                   real scaleOutput) {
-  size_t num = image.getHeight();
+  size_t num = getHeight();
   size_t channels = size_t(width_ / imgSizeD / imgSizeH / imgSizeW);
-  CHECK(image.getWidth() == imgSizeD * imgSizeH * imgSizeW * channels);
-  CHECK(image.getHeight() == height_ && image.getWidth() == width_);
-  CHECK(outV.getHeight() == outGrad.getHeight() &&
-        outV.getWidth() == outGrad.getWidth());
+  CHECK(maxPoolIdx.getHeight() == outGrad.getHeight() &&
+        maxPoolIdx.getWidth() == outGrad.getWidth());
 
   real* tgtGrad = getData();
-  real* inData = image.getData();
-  real* otData = outV.getData();
   real* otGrad = outGrad.getData();
+  real* maxPoolIdxData = maxPoolIdx.getData();
 
-  size_t outStride = outV.getStride();
+  size_t outStride = outGrad.getStride();
   ;
 
   for (size_t n = 0; n < num; ++n) {
-    if (!outV.isContiguous()) {
-      otData = outV.getData() + n * outStride;
+    if (!outGrad.isContiguous()) {
       otGrad = outGrad.getData() + n * outStride;
+      maxPoolIdxData = maxPoolIdx.getData() + n * outStride;
     }
     for (size_t c = 0; c < channels; ++c) {
       for (size_t pd = 0; pd < outputD; ++pd) {
         for (size_t ph = 0; ph < outputH; ++ph) {
           for (size_t pw = 0; pw < outputW; ++pw) {
-            int dstart = pd * strideD - paddingD;
-            int hstart = ph * strideH - paddingH;
-            int wstart = pw * strideW - paddingW;
-            int dend = std::min(dstart + sizeZ, imgSizeD);
-            int hend = std::min(hstart + sizeY, imgSizeH);
-            int wend = std::min(wstart + sizeX, imgSizeW);
-            dstart = std::max(dstart, 0);
-            hstart = std::max(hstart, 0);
-            wstart = std::max(wstart, 0);
-            for (int d = dstart; d < dend; ++d) {
-              for (int h = hstart; h < hend; ++h) {
-                for (int w = wstart; w < wend; ++w) {
-                  tgtGrad[(d * imgSizeH + h) * imgSizeW + w] =
-                      scaleTargets *
-                          tgtGrad[(d * imgSizeH + h) * imgSizeW + w] +
-                      scaleOutput * otGrad[(pd * outputH + ph) * outputW + pw] *
-                          (inData[(d * imgSizeH + h) * imgSizeW + w] ==
-                           otData[(pd * outputH + ph) * outputW + pw]);
-                }
-              }
-            }
+            const size_t index = (pd * outputH + ph) * outputW + pw;
+            const size_t tgtIdx = static_cast<size_t>(maxPoolIdxData[index]);
+            tgtGrad[tgtIdx] =
+                scaleTargets * tgtGrad[tgtIdx] + scaleOutput * otGrad[index];
           }
         }
       }
       // offset
-      inData += imgSizeD * imgSizeH * imgSizeW;
       tgtGrad += imgSizeD * imgSizeH * imgSizeW;
-      otData += outputD * outputH * outputW;
       otGrad += outputD * outputH * outputW;
+      maxPoolIdxData += outputD * outputH * outputW;
     }
   }
 }
diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h
index dec9702433..e674c1e9ab 100644
--- a/paddle/math/Matrix.h
+++ b/paddle/math/Matrix.h
@@ -933,6 +933,7 @@ public:
    * in the sizeX of value
    */
   virtual void maxPool3DForward(Matrix& inputMat,
+                                Matrix& maxPoolIdx,
                                 size_t channels,
                                 size_t imgSizeD,
                                 size_t imgSizeH,
@@ -952,9 +953,8 @@ public:
     LOG(FATAL) << "Not implemeted";
   }
 
-  virtual void maxPool3DBackward(Matrix& image,
-                                 Matrix& outGrad,
-                                 Matrix& outV,
+  virtual void maxPool3DBackward(Matrix& outGrad,
+                                 Matrix& maxPoolIdx,
                                  size_t imgSizeD,
                                  size_t imgSizeH,
                                  size_t imgSizeW,
@@ -1436,6 +1436,7 @@ public:
                        size_t paddingW);
 
   void maxPool3DForward(Matrix& inputMat,
+                        Matrix& maxPoolIdx,
                         size_t channels,
                         size_t imgSizeD,
                         size_t imgSizeH,
@@ -1453,9 +1454,8 @@ public:
                         size_t paddingH,
                         size_t paddingW);
 
-  void maxPool3DBackward(Matrix& image,
-                         Matrix& outGrad,
-                         Matrix& outV,
+  void maxPool3DBackward(Matrix& outGrad,
+                         Matrix& maxPoolIdx,
                          size_t imgSizeD,
                          size_t imgSizeH,
                          size_t imgSizeW,
@@ -1671,6 +1671,7 @@ public:
                        size_t paddingW);
 
   void maxPool3DForward(Matrix& inputMat,
+                        Matrix& maxPoolIdx,
                         size_t channels,
                         size_t imgSizeD,
                         size_t imgSizeH,
@@ -1688,9 +1689,8 @@ public:
                         size_t paddingH,
                         size_t paddingW);
 
-  void maxPool3DBackward(Matrix& image,
-                         Matrix& outGrad,
-                         Matrix& outV,
+  void maxPool3DBackward(Matrix& outGrad,
+                         Matrix& maxPoolIdx,
                          size_t imgSizeD,
                          size_t imgSizeH,
                          size_t imgSizeW,
diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp
index 21ee8543cd..d7ad6f18ac 100644
--- a/paddle/math/tests/test_matrixCompare.cpp
+++ b/paddle/math/tests/test_matrixCompare.cpp
@@ -1204,202 +1204,398 @@ TEST(Matrix, warpCTC) {
   }
 }
 
-void testMatrixPool3D(int depth, int height, int width) {
-  int channel = 3;
-  int filterX = 3, filterY = 4, filterZ = 5;
-  int strideX = 2, strideY = 2, strideZ = 2;
-  int padX = 1, padY = 1, padZ = 1;
-
-  MatrixPtr cpuImage =
-      std::make_shared<CpuMatrix>(1, channel * depth * height * width);
-  MatrixPtr gpuImage =
-      std::make_shared<GpuMatrix>(1, channel * depth * height * width);
-
-  int outD = outputSize(depth, filterZ, padZ, strideZ, true);
-  int outH = outputSize(height, filterY, padZ, strideY, true);
-  int outW = outputSize(width, filterX, padZ, strideX, true);
-
-  int colBufWidth = outD * outH * outW;
-  MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(1, channel * colBufWidth);
-  MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(1, channel * colBufWidth);
-
-  cpuImage->randomizeUniform();
-  gpuImage->copyFrom(*cpuImage);
-  cpuOutput->maxPool3DForward(*cpuImage,
-                              channel,
-                              depth,
-                              height,
-                              width,
-                              outD,
-                              outH,
-                              outW,
-                              filterZ,
-                              filterY,
-                              filterX,
-                              strideZ,
-                              strideY,
-                              strideX,
-                              padZ,
-                              padY,
-                              padX);
-  gpuOutput->maxPool3DForward(*gpuImage,
-                              channel,
-                              depth,
-                              height,
-                              width,
-                              outD,
-                              outH,
-                              outW,
-                              filterZ,
-                              filterY,
-                              filterX,
-                              strideZ,
-                              strideY,
-                              strideX,
-                              padZ,
-                              padY,
-                              padX);
-  TensorCheckErr(*cpuOutput, *gpuOutput);
+void testMaxPool3DFwdBwd(int numSamples,
+                         int channels,
+                         int imgSizeD,
+                         int imgSizeH,
+                         int imgSizeW,
+                         int ksizeD,
+                         int ksizeH,
+                         int ksizeW,
+                         int strideD,
+                         int strideH,
+                         int strideW,
+                         int padD,
+                         int padH,
+                         int padW) {
+  int outD = outputSize(imgSizeD, ksizeD, padD, strideD, true);
+  int outH = outputSize(imgSizeH, ksizeH, padH, strideH, true);
+  int outW = outputSize(imgSizeW, ksizeW, padW, strideW, true);
+
+  int inWidth = channels * imgSizeD * imgSizeH * imgSizeW;
+  MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false);
+  MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true);
 
-  cpuImage->randomizeUniform();
-  gpuImage->copyFrom(*cpuImage);
-  cpuOutput->avgPool3DForward(*cpuImage,
-                              channel,
-                              depth,
-                              height,
-                              width,
-                              outD,
-                              outH,
-                              outW,
-                              filterZ,
-                              filterY,
-                              filterX,
-                              strideZ,
-                              strideY,
-                              strideX,
-                              padZ,
-                              padY,
-                              padX);
-
-  gpuOutput->avgPool3DForward(*gpuImage,
-                              channel,
-                              depth,
-                              height,
-                              width,
-                              outD,
-                              outH,
-                              outW,
-                              filterZ,
-                              filterY,
-                              filterX,
-                              strideZ,
-                              strideY,
-                              strideX,
-                              padZ,
-                              padY,
-                              padX);
-  TensorCheckErr(*cpuOutput, *gpuOutput);
-  cpuImage->randomizeUniform();
-  gpuImage->copyFrom(*cpuImage);
-  cpuOutput->randomizeUniform();
-  gpuOutput->copyFrom(*cpuOutput);
-  cpuImage->avgPool3DBackward(*cpuOutput,
-                              depth,
-                              height,
-                              width,
-                              outD,
-                              outH,
-                              outW,
-                              filterZ,
-                              filterY,
-                              filterX,
-                              strideZ,
-                              strideY,
-                              strideX,
-                              padZ,
-                              padY,
-                              padX,
-                              1.0,
-                              1.0);
-
-  gpuImage->avgPool3DBackward(*gpuOutput,
-                              depth,
-                              height,
-                              width,
-                              outD,
-                              outH,
-                              outW,
-                              filterZ,
-                              filterY,
-                              filterX,
-                              strideZ,
-                              strideY,
-                              strideX,
-                              padZ,
-                              padY,
-                              padX,
-                              1.0,
-                              1.0);
-  TensorCheckErr(*cpuImage, *gpuImage);
-
-  cpuImage->randomizeUniform();
-  gpuImage->copyFrom(*cpuImage);
-  cpuOutput->randomizeUniform();
-  gpuOutput->copyFrom(*cpuOutput);
-  cpuImage->maxPool3DBackward(*cpuImage,
-                              *cpuOutput,
-                              *cpuOutput,
-                              depth,
-                              height,
-                              width,
+  int outWidth = channels * outD * outH * outW;
+  MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false);
+  MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true);
+  MatrixPtr maxIdx = CpuMatrix::create(numSamples, outWidth, false, false);
+  MatrixPtr maxIdxGpu = GpuMatrix::create(numSamples, outWidth, false, true);
+
+  input->randomizeUniform();
+  target->randomizeUniform();
+  inputGpu->copyFrom(*input);
+  targetGpu->copyFrom(*target);
+
+  target->maxPool3DForward(*input,
+                           *maxIdx,
+                           channels,
+                           imgSizeD,
+                           imgSizeH,
+                           imgSizeW,
+                           outD,
+                           outH,
+                           outW,
+                           ksizeD,
+                           ksizeH,
+                           ksizeW,
+                           strideD,
+                           strideH,
+                           strideW,
+                           padD,
+                           padH,
+                           padW);
+  targetGpu->maxPool3DForward(*inputGpu,
+                              *maxIdxGpu,
+                              channels,
+                              imgSizeD,
+                              imgSizeH,
+                              imgSizeW,
                               outD,
                               outH,
                               outW,
-                              filterZ,
-                              filterY,
-                              filterX,
-                              strideZ,
-                              strideY,
-                              strideX,
-                              padZ,
-                              padY,
-                              padX,
-                              1.0,
-                              1.0);
-
-  gpuImage->maxPool3DBackward(*gpuImage,
-                              *gpuOutput,
-                              *gpuOutput,
-                              depth,
-                              height,
-                              width,
+                              ksizeD,
+                              ksizeH,
+                              ksizeW,
+                              strideD,
+                              strideH,
+                              strideW,
+                              padD,
+                              padH,
+                              padW);
+  MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false);
+  targetCheck->copyFrom(*targetGpu);
+  checkMatrixEqual(target, targetCheck);
+
+  MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
+  MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);
+  MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
+  MatrixPtr targetGpuGrad =
+      GpuMatrix::create(numSamples, outWidth, false, true);
+
+  inputGrad->randomizeUniform();
+  targetGrad->randomizeUniform();
+  inputGpuGrad->copyFrom(*inputGrad);
+  targetGpuGrad->copyFrom(*targetGrad);
+
+  inputGrad->maxPool3DBackward(*targetGrad,
+                               *maxIdx,
+                               imgSizeD,
+                               imgSizeH,
+                               imgSizeW,
+                               outD,
+                               outH,
+                               outW,
+                               ksizeD,
+                               ksizeH,
+                               ksizeW,
+                               strideD,
+                               strideH,
+                               strideW,
+                               padD,
+                               padH,
+                               padW,
+                               1.0,
+                               1.0);
+  inputGpuGrad->maxPool3DBackward(*targetGpuGrad,
+                                  *maxIdxGpu,
+                                  imgSizeD,
+                                  imgSizeH,
+                                  imgSizeW,
+                                  outD,
+                                  outH,
+                                  outW,
+                                  ksizeD,
+                                  ksizeH,
+                                  ksizeW,
+                                  strideD,
+                                  strideH,
+                                  strideW,
+                                  padD,
+                                  padH,
+                                  padW,
+                                  1.0,
+                                  1.0);
+  MatrixPtr targetBwdCheck =
+      CpuMatrix::create(numSamples, inWidth, false, false);
+  targetBwdCheck->copyFrom(*inputGpuGrad);
+  checkMatrixEqual(inputGrad, targetBwdCheck);
+}
+
+void testAvgPool3DFwdBwd(int numSamples,
+                         int channels,
+                         int imgSizeD,
+                         int imgSizeH,
+                         int imgSizeW,
+                         int ksizeD,
+                         int ksizeH,
+                         int ksizeW,
+                         int strideD,
+                         int strideH,
+                         int strideW,
+                         int padD,
+                         int padH,
+                         int padW) {
+  int outD = outputSize(imgSizeD, ksizeD, padD, strideD, true);
+  int outH = outputSize(imgSizeH, ksizeH, padH, strideH, true);
+  int outW = outputSize(imgSizeW, ksizeW, padW, strideW, true);
+
+  int inWidth = imgSizeD * imgSizeH * imgSizeW * channels;
+  MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false);
+  MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true);
+
+  int outWidth = channels * outD * outH * outW;
+  MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false);
+  MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true);
+
+  input->randomizeUniform();
+  target->randomizeUniform();
+  inputGpu->copyFrom(*input);
+  targetGpu->copyFrom(*target);
+
+  target->avgPool3DForward(*input,
+                           channels,
+                           imgSizeD,
+                           imgSizeH,
+                           imgSizeW,
+                           outD,
+                           outH,
+                           outW,
+                           ksizeD,
+                           ksizeH,
+                           ksizeW,
+                           strideD,
+                           strideH,
+                           strideW,
+                           padD,
+                           padH,
+                           padW);
+
+  targetGpu->avgPool3DForward(*inputGpu,
+                              channels,
+                              imgSizeD,
+                              imgSizeH,
+                              imgSizeW,
                               outD,
                               outH,
                               outW,
-                              filterZ,
-                              filterY,
-                              filterX,
-                              strideZ,
-                              strideY,
-                              strideX,
-                              padZ,
-                              padY,
-                              padX,
-                              1.0,
-                              1.0);
-  TensorCheckErr(*cpuImage, *gpuImage);
+                              ksizeD,
+                              ksizeH,
+                              ksizeW,
+                              strideD,
+                              strideH,
+                              strideW,
+                              padD,
+                              padH,
+                              padW);
+
+  TensorCheckErr(*target, *targetGpu);
+
+  MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
+  MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);
+  MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
+  MatrixPtr targetGpuGrad =
+      GpuMatrix::create(numSamples, outWidth, false, true);
+
+  inputGrad->randomizeUniform();
+  targetGrad->randomizeUniform();
+  inputGpuGrad->copyFrom(*inputGrad);
+  targetGpuGrad->copyFrom(*targetGrad);
+
+  inputGrad->avgPool3DBackward(*targetGrad,
+                               imgSizeD,
+                               imgSizeH,
+                               imgSizeW,
+                               outD,
+                               outH,
+                               outW,
+                               ksizeD,
+                               ksizeH,
+                               ksizeW,
+                               strideD,
+                               strideH,
+                               strideW,
+                               padD,
+                               padH,
+                               padW,
+                               1.0,
+                               1.0);
+
+  inputGpuGrad->avgPool3DBackward(*targetGpuGrad,
+                                  imgSizeD,
+                                  imgSizeH,
+                                  imgSizeW,
+                                  outD,
+                                  outH,
+                                  outW,
+                                  ksizeD,
+                                  ksizeH,
+                                  ksizeW,
+                                  strideD,
+                                  strideH,
+                                  strideW,
+                                  padD,
+                                  padH,
+                                  padW,
+                                  1.0,
+                                  1.0);
+  TensorCheckErr(*inputGrad, *inputGpuGrad);
 }
 
-TEST(Matrix, Pool3D) {
-  for (auto depth : {9, 16, 64}) {
-    for (auto height : {9, 11, 128}) {
-      for (auto width : {9, 32, 128}) {
-        VLOG(3) << "depth=" << depth << " height=" << height
-                << " width=" << width;
-        testMatrixPool3D(depth, height, width);
+// TODO(yi): I noticed many such blindly combinatorial tests in this
+// file.  They are no help to locate defects at all.
+TEST(Matrix, Pool3DFwdBwd) {
+  for (auto numSamples : {1, 3}) {
+    for (auto channels : {3}) {
+      for (auto imgSizeD : {9, 16}) {
+        for (auto imgSizeH : {9, 32}) {
+          for (auto imgSizeW : {9, 32}) {
+            for (auto sizeX : {3}) {
+              for (auto sizeY : {3}) {
+                for (auto sizeZ : {3}) {
+                  for (auto sD : {2}) {
+                    for (auto sH : {2}) {
+                      for (auto sW : {2}) {
+                        for (auto pD : {0, (sizeZ - 1) / 2}) {
+                          for (auto pH : {0, (sizeY - 1) / 2}) {
+                            for (auto pW : {0, (sizeX - 1) / 2}) {
+                              VLOG(3) << " numSamples=" << numSamples
+                                      << " channels=" << channels
+                                      << " imgSizeD=" << imgSizeD
+                                      << " imgSizeH=" << imgSizeH
+                                      << " imgSizeW=" << imgSizeW
+                                      << " sizeX=" << sizeX
+                                      << " sizeY=" << sizeY
+                                      << " sizeZ=" << sizeZ << " strideD=" << sD
+                                      << " strideH=" << sH << " strideW=" << sW
+                                      << " padingD=" << pD << " padingH=" << pH
+                                      << " padingW=" << pW;
+
+                              testMaxPool3DFwdBwd(numSamples,
+                                                  channels,
+                                                  imgSizeD,
+                                                  imgSizeH,
+                                                  imgSizeW,
+                                                  sizeX,
+                                                  sizeY,
+                                                  sizeZ,
+                                                  sD,
+                                                  sH,
+                                                  sW,
+                                                  pD,
+                                                  pH,
+                                                  pW);
+                              testAvgPool3DFwdBwd(numSamples,
+                                                  channels,
+                                                  imgSizeD,
+                                                  imgSizeH,
+                                                  imgSizeW,
+                                                  sizeX,
+                                                  sizeY,
+                                                  sizeZ,
+                                                  sD,
+                                                  sH,
+                                                  sW,
+                                                  pD,
+                                                  pH,
+                                                  pW);
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
       }
     }
   }
+
+  //
+  //  for (auto numSamples : {1, 3}) {
+  //    for (auto channels : {1, 3}) {
+  //      for (auto imgSizeD : {9,16}) {
+  //      for (auto imgSizeH : {9, 32}) {
+  //        for (auto imgSizeW : {9, 32}) {
+  //          for (auto sizeX : {2, 3}) {
+  //            for (auto sizeY : {2, 3}) {
+  //            for (auto sizeZ : {2,3}){
+  //              for (auto sD : {1, 2}) {
+  //              for (auto sH : {1, 2}) {
+  //                for (auto sW : {1, 2}) {
+  //                  for (auto pD : {0, (sizeZ - 1) / 2}){
+  //                  for (auto pH : {0, (sizeY - 1) / 2}) {
+  //                    for (auto pW : {0, (sizeX - 1) / 2}) {
+  //                      VLOG(3) << " numSamples=" << numSamples
+  //                              << " channels=" << channels
+  //                              << " imgSizeD=" << imgSizeD
+  //                              << " imgSizeH=" << imgSizeH
+  //                              << " imgSizeW=" << imgSizeW
+  //                              << " sizeX=" << sizeX
+  //                              << " sizeY=" << sizeY
+  //                              << " sizeZ=" << sizeZ
+  //                              << " strideD=" << sD
+  //                              << " strideH=" << sH
+  //                              << " strideW=" << sW
+  //                              << " padingD=" << pD
+  //                              << " padingH=" << pH
+  //                              << " padingW=" << pW;
+  //
+  //                      testMaxPool3DFwdBwd(numSamples,
+  //                                        channels,
+  //                                        imgSizeD,
+  //                                        imgSizeH,
+  //                                        imgSizeW,
+  //                                        sizeX,
+  //                                        sizeY,
+  //                                        sizeZ,
+  //                                        sD,
+  //                                        sH,
+  //                                        sW,
+  //                                        pD,
+  //                                        pH,
+  //                                        pW);
+  //                      testAvgPool3DFwdBwd(numSamples,
+  //                                        channels,
+  //                                        imgSizeD,
+  //                                        imgSizeH,
+  //                                        imgSizeW,
+  //                                        sizeX,
+  //                                        sizeY,
+  //                                        sizeZ,
+  //                                        sD,
+  //                                        sH,
+  //                                        sW,
+  //                                        pD,
+  //                                        pH,
+  //                                        pW);
+  //                    }
+  //                  }
+  //                }
+  //              }
+  //            }
+  //            }
+  //          }
+  //        }
+  //      }
+  //      }
+  //    }
+  //    }
+  //  }
+  //  }
 }
 
 #endif

From 692259e071494d08f50cc3b1a4e34326249bc132 Mon Sep 17 00:00:00 2001
From: hedaoyuan <hedaoyuan@github.com>
Date: Thu, 24 Aug 2017 23:42:32 +0800
Subject: [PATCH 094/170] Fix pre-commit-config.yaml

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a772125df6..83fe9af768 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,7 +22,7 @@
     -   id: clang-format-with-version-check
         name: clang-format
         description: Format files with ClangFormat.
-        entry: ./.clang_format.hook -i
+        entry: bash ./.clang_format.hook -i
         language: system
         files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$
 -   repo: https://github.com/PaddlePaddle/pre-commit-golang

From 0dffe68ca9973c5cf7d95029e369330ffcfe0187 Mon Sep 17 00:00:00 2001
From: hedaoyuan <hedaoyuan@github.com>
Date: Thu, 24 Aug 2017 23:45:17 +0800
Subject: [PATCH 095/170] Add NeonDepthwiseConvFunction.

---
 paddle/function/CMakeLists.txt             |   2 +
 paddle/function/DepthwiseConvOpTest.cpp    |   5 +
 paddle/function/Im2Col.h                   |  92 +++++++++
 paddle/function/neon/NeonDepthwiseConv.cpp | 227 +++++++++++++++++++++
 paddle/function/neon/NeonDepthwiseConv.h   |  25 +++
 paddle/function/neon/neon_util.h           |  47 +++++
 6 files changed, 398 insertions(+)
 create mode 100644 paddle/function/neon/NeonDepthwiseConv.cpp
 create mode 100644 paddle/function/neon/NeonDepthwiseConv.h
 create mode 100644 paddle/function/neon/neon_util.h

diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt
index c572a9d433..05f808a6a1 100644
--- a/paddle/function/CMakeLists.txt
+++ b/paddle/function/CMakeLists.txt
@@ -21,6 +21,8 @@ if(USE_NNPACK)
   endif()
 endif()
 
+list(APPEND cpp_files neon/NeonDepthwiseConv.cpp)
+
 add_library(paddle_function STATIC ${cpp_files} ${cu_objs})
 add_dependencies(paddle_function ${external_project_dependencies})
 add_dependencies(paddle_function paddle_proto)
diff --git a/paddle/function/DepthwiseConvOpTest.cpp b/paddle/function/DepthwiseConvOpTest.cpp
index f44ae0c342..bdace2c372 100644
--- a/paddle/function/DepthwiseConvOpTest.cpp
+++ b/paddle/function/DepthwiseConvOpTest.cpp
@@ -34,4 +34,9 @@ TEST(DepthwiseConv, BackwardFilter) {
 }
 #endif
 
+TEST(DepthwiseConv, Forward) {
+  DepthwiseConvolution<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU>(
+      "GemmConv-CPU", "NeonDepthwiseConv-CPU", forward);
+}
+
 }  // namespace paddle
diff --git a/paddle/function/Im2Col.h b/paddle/function/Im2Col.h
index 48e2e32f92..9b91e223a6 100644
--- a/paddle/function/Im2Col.h
+++ b/paddle/function/Im2Col.h
@@ -16,6 +16,7 @@ limitations under the License. */
 
 #include "TensorShape.h"
 #include "TensorType.h"
+#include "neon/neon_util.h"
 
 namespace paddle {
 
@@ -93,4 +94,95 @@ public:
                   int paddingWidth);
 };
 
+template <class T>
+struct Padding {
+  static void run(const T* src,
+                  T* dest,
+                  int channels,
+                  int inputHeight,
+                  int inputWidth,
+                  int paddingHeight,
+                  int paddingWidth) {
+    const int destWidth = inputWidth + 2 * paddingWidth;
+    for (int c = 0; c < channels; c++) {
+      if (paddingHeight > 0) {
+        memset(dest, 0, destWidth * paddingHeight * sizeof(T));
+        dest += destWidth * paddingHeight;
+      }
+
+      for (int i = 0; i < inputHeight; i++) {
+        // padding head
+        for (int j = 0; j < paddingWidth; j++) {
+          *dest++ = T(0);
+        }
+
+        memcpy(dest, src, inputWidth * sizeof(T));
+        dest += inputWidth;
+        src += inputWidth;
+
+        // padding tail
+        for (int j = 0; j < paddingWidth; j++) {
+          *dest++ = T(0);
+        }
+      }
+
+      if (paddingHeight > 0) {
+        memset(dest, 0, destWidth * paddingHeight * sizeof(T));
+        dest += destWidth * paddingHeight;
+      }
+    }
+  }
+};
+
+#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+template <>
+struct Padding<float> {
+  static void run(const float* src,
+                  float* dest,
+                  int channels,
+                  int inputHeight,
+                  int inputWidth,
+                  int paddingHeight,
+                  int paddingWidth) {
+    const int destWidth = inputWidth + 2 * paddingWidth;
+    for (int c = 0; c < channels; c++) {
+      if (paddingHeight > 0) {
+        memset(dest, 0, destWidth * paddingHeight * sizeof(float));
+        dest += destWidth * paddingHeight;
+      }
+
+      for (int i = 0; i < inputHeight; i++) {
+        // padding head
+        for (int j = 0; j < paddingWidth; j++) {
+          *dest++ = float(0);
+        }
+
+        int step = inputWidth >> 2;
+        int remain = inputWidth & 3;
+        for (int s = 0; s < step; s++) {
+          float32x4_t s0 = vld1q_f32(src);
+          vst1q_f32(dest, s0);
+          src += 4;
+          dest += 4;
+        }
+        for (int r = 0; r < remain; r++) {
+          *dest++ = *src++;
+        }
+
+        // padding tail
+        for (int j = 0; j < paddingWidth; j++) {
+          *dest++ = float(0);
+        }
+      }
+
+      if (paddingHeight > 0) {
+        memset(dest, 0, destWidth * paddingHeight * sizeof(float));
+        dest += destWidth * paddingHeight;
+      }
+    }
+  }
+};
+
+#endif
+
 }  // namespace paddle
diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp
new file mode 100644
index 0000000000..16d94c976e
--- /dev/null
+++ b/paddle/function/neon/NeonDepthwiseConv.cpp
@@ -0,0 +1,227 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "neon_util.h"
+#include "paddle/function/ConvOp.h"
+#include "paddle/function/Im2Col.h"
+
+namespace paddle {
+
+namespace neon {
+
+#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+
+template <int filterSize, int stride>
+struct DepthwiseConvKernel {};
+
+inline float32_t conv3x3(float32x4_t r0,
+                         float32x4_t r1,
+                         float32x4_t r2,
+                         float32x4_t k0,
+                         float32x4_t k1,
+                         float32x4_t k2) {
+  float32x4_t tmp;
+  tmp = vmulq_f32(r0, k0);
+  tmp = vmlaq_f32(tmp, r1, k1);
+  tmp = vmlaq_f32(tmp, r2, k2);
+  return vaddvq_f32(tmp);
+}
+
+/**
+ * Each step calculates four elements of the output.
+ * First step:
+ *   R0[0, 1, 2, 3...] * K[0][0]
+ *   R0[1, 2, 3, 4...] * K[0][1]
+ *   R0[2, 3, 4, 5...] * K[0][2]
+ *   R1[0, 1, 2, 3...] * K[1][0]
+ *   R1[1, 2, 3, 4...] * K[1][1]
+ *   R1[2, 3, 4, 5...] * K[1][2]
+ *   R2[0, 1, 2, 3...] * K[2][0]
+ *   R2[1, 2, 3, 4...] * K[2][1]
+ * + R2[2, 3, 4, 5...] * K[2][2]
+ * ------------------------------
+ *     Output[0, 1, 2, 3]
+ */
+template <>
+struct DepthwiseConvKernel<3, 1> {
+  static void run(const float* inputData,
+                  const float* filterData,
+                  int inputHeight,
+                  int inputWidth,
+                  int outputChannels,
+                  int outputHeight,
+                  int outputWidth,
+                  int filterMultiplier,
+                  float* outputData) {
+    const int steps = outputWidth >> 2;
+    const int remain = outputWidth & 3;
+    for (int c = 0; c < outputChannels; c++, filterData += 9) {
+      // Load the filters
+      float32x4_t k[3];
+      k[0] = vld1q_f32(filterData);
+      k[1] = vld1q_f32(filterData + 3);
+      k[2] = vld1q_f32(filterData + 6);
+      k[0] = vsetq_lane_f32(0.f, k[0], 3);
+      k[1] = vsetq_lane_f32(0.f, k[1], 3);
+      k[2] = vsetq_lane_f32(0.f, k[2], 3);
+
+      const float* r0 =
+          inputData + (c / filterMultiplier) * (inputHeight * inputWidth);
+      const float* r1 = r0 + inputWidth;
+      const float* r2 = r0 + inputWidth * 2;
+      float32x4_t input[3][3];
+      for (int h = 0; h < outputHeight; h++) {
+        for (int s = 0; s < steps; s++) {
+          // Load the inputs
+          float32x4_t tmp;
+          input[0][0] = vld1q_f32(r0);
+          tmp = vld1q_f32(r0 + 4);
+          input[0][1] = vextq_f32(input[0][0], tmp, 1);
+          input[0][2] = vextq_f32(input[0][0], tmp, 2);
+          input[1][0] = vld1q_f32(r1);
+          tmp = vld1q_f32(r1 + 4);
+          input[1][1] = vextq_f32(input[1][0], tmp, 1);
+          input[1][2] = vextq_f32(input[1][0], tmp, 2);
+          input[2][0] = vld1q_f32(r2);
+          tmp = vld1q_f32(r2 + 4);
+          input[2][1] = vextq_f32(input[2][0], tmp, 1);
+          input[2][2] = vextq_f32(input[2][0], tmp, 2);
+
+          float32x4_t tmp1 = vdupq_n_f32(0.f);
+          float32x4_t tmp2 = vdupq_n_f32(0.f);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[1][0], k[1], 0);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[1][1], k[1], 1);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[1][2], k[1], 2);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2);
+          tmp1 = vaddq_f32(tmp1, tmp2);
+
+          vst1q_f32(outputData, tmp1);
+          r0 += 4;
+          r1 += 4;
+          r2 += 4;
+          outputData += 4;
+        }
+
+        for (int r = 0; r < remain; r++) {
+          float32x4_t i0 = vld1q_f32(r0);
+          float32x4_t i1 = vld1q_f32(r1);
+          float32x4_t i2 = vld1q_f32(r2);
+          *outputData = conv3x3(i0, i1, i2, k[0], k[1], k[2]);
+          r0++;
+          r1++;
+          r2++;
+          outputData++;
+        }
+
+        r0 += 2;
+        r1 += 2;
+        r2 += 2;
+      }
+    }
+  }
+};
+
+template <DeviceType Device>
+class NeonDepthwiseConvFunction : public ConvFunctionBase {
+public:
+  void init(const FuncConfig& config) override {
+    ConvFunctionBase::init(config);
+  }
+
+  void check(const BufferArgs& inputs, const BufferArgs& outputs) override {
+    const TensorShape& input = inputs[0].shape();
+    const TensorShape& filter = inputs[1].shape();
+    const TensorShape& output = outputs[0].shape();
+    checkShape(input, filter, output);
+  }
+
+  void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
+    CHECK_EQ(numInputs_, inputs.size());
+    CHECK_EQ(numOutputs_, outputs.size());
+    check(inputs, outputs);
+
+    const TensorShape& input = inputs[0].shape();
+    const TensorShape& filter = inputs[1].shape();
+    const TensorShape& output = outputs[0].shape();
+
+    size_t batchSize = input[0];
+    size_t inputChannels = input[1];
+    size_t inputHeight = input[2];
+    size_t inputWidth = input[3];
+    size_t filterHeight = getFilterHeight(filter);
+    size_t filterWidth = getFilterWidth(filter);
+    size_t outputChannels = output[1];
+    size_t outputHeight = output[2];
+    size_t outputWidth = output[3];
+    size_t filterMultiplier = outputChannels / groups_;
+    CHECK_EQ(inputChannels, groups_);
+
+    // only support
+    CHECK_EQ(strideH(), strideW());
+    CHECK_EQ(filterHeight, filterWidth);
+    CHECK_EQ(filterHeight, size_t(3));
+    CHECK_LT(strideH(), size_t(3));
+
+    float* inputData = inputs[0].data<float>();
+    float* filterData = inputs[1].data<float>();
+    float* outputData = outputs[0].data<float>();
+
+    // padding the input
+    float* inputPadding = inputData;
+    if (paddingH() > 0 || paddingW() > 0) {
+      int newSize = batchSize * inputChannels * (inputHeight + 2 * paddingH()) *
+                    (inputWidth + 2 * paddingW());
+      resizeBuffer<Device>(newSize);
+      inputPadding = reinterpret_cast<float*>(memory_->getBuf());
+      Padding<float>::run(inputData,
+                          inputPadding,
+                          batchSize * inputChannels,
+                          inputHeight,
+                          inputWidth,
+                          paddingH(),
+                          paddingW());
+
+      // height and width of padding data
+      inputHeight += 2 * paddingH();
+      inputWidth += 2 * paddingW();
+    }
+
+    for (size_t i = 0; i < batchSize; i++) {
+      DepthwiseConvKernel<3, 1>::run(inputPadding,
+                                     filterData,
+                                     inputHeight,
+                                     inputWidth,
+                                     outputChannels,
+                                     outputHeight,
+                                     outputWidth,
+                                     filterMultiplier,
+                                     outputData);
+
+      inputPadding += inputChannels * inputHeight * inputWidth;
+      outputData += outputChannels * outputHeight * outputWidth;
+    }
+  }
+};
+
+REGISTER_TYPED_FUNC(NeonDepthwiseConv, CPU, NeonDepthwiseConvFunction);
+
+#endif
+
+}  // namespace neon
+}  // namespace paddle
diff --git a/paddle/function/neon/NeonDepthwiseConv.h b/paddle/function/neon/NeonDepthwiseConv.h
new file mode 100644
index 0000000000..23e4be1921
--- /dev/null
+++ b/paddle/function/neon/NeonDepthwiseConv.h
@@ -0,0 +1,25 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+namespace paddle {
+
+namespace neon {
+
+template <int filterSize, int stride>
+struct DepthwiseConvKernel {};
+
+}  // namespace neon
+}  // namespace paddle
diff --git a/paddle/function/neon/neon_util.h b/paddle/function/neon/neon_util.h
new file mode 100644
index 0000000000..56b3febe2d
--- /dev/null
+++ b/paddle/function/neon/neon_util.h
@@ -0,0 +1,47 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+
+#include <arm_neon.h>
+
+namespace paddle {
+
+namespace neon {
+
+inline float32x4_t vld1q_f32_aligned(const float* p) {
+  return vld1q_f32(
+      (const float*)__builtin_assume_aligned(p, sizeof(float32x4_t)));
+}
+
+#ifndef __aarch64__
+inline float32_t vaddvq_f32(float32x4_t a) {
+  float32x2_t v = vadd_f32(vget_high_f32(a), vget_low_f32(a));
+  return vget_lane_f32(vpadd_f32(v, v), 0);
+}
+
+inline float32x4_t vmlaq_laneq_f32(float32x4_t a,
+                                   float32x4_t b,
+                                   float32x4_t v,
+                                   const int lane) {
+  return vmlaq_n_f32(a, b, vgetq_lane_f32(v, lane));
+}
+#endif
+
+}  // namespace neon
+}  // namespace paddle
+
+#endif

From b7885b087b74a1ab446f8f34d1fd78085d8b4316 Mon Sep 17 00:00:00 2001
From: hedaoyuan <hedaoyuan@github.com>
Date: Fri, 25 Aug 2017 00:47:51 +0800
Subject: [PATCH 096/170] Add DepthwiseConvKernel for filter size is 4.

---
 paddle/function/neon/NeonDepthwiseConv.cpp | 155 +++++++++++++++++++--
 1 file changed, 145 insertions(+), 10 deletions(-)

diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp
index 16d94c976e..c017241c92 100644
--- a/paddle/function/neon/NeonDepthwiseConv.cpp
+++ b/paddle/function/neon/NeonDepthwiseConv.cpp
@@ -38,6 +38,22 @@ inline float32_t conv3x3(float32x4_t r0,
   return vaddvq_f32(tmp);
 }
 
+inline float32_t conv4x4(float32x4_t r0,
+                         float32x4_t r1,
+                         float32x4_t r2,
+                         float32x4_t r3,
+                         float32x4_t k0,
+                         float32x4_t k1,
+                         float32x4_t k2,
+                         float32x4_t k3) {
+  float32x4_t tmp;
+  tmp = vmulq_f32(r0, k0);
+  tmp = vmlaq_f32(tmp, r1, k1);
+  tmp = vmlaq_f32(tmp, r2, k2);
+  tmp = vmlaq_f32(tmp, r3, k3);
+  return vaddvq_f32(tmp);
+}
+
 /**
  * Each step calculates four elements of the output.
  * First step:
@@ -137,6 +153,114 @@ struct DepthwiseConvKernel<3, 1> {
   }
 };
 
+/**
+ * Each step calculates four elements of the output.
+ */
+template <>
+struct DepthwiseConvKernel<4, 1> {
+  static void run(const float* inputData,
+                  const float* filterData,
+                  int inputHeight,
+                  int inputWidth,
+                  int outputChannels,
+                  int outputHeight,
+                  int outputWidth,
+                  int filterMultiplier,
+                  float* outputData) {
+    const int steps = outputWidth >> 2;
+    const int remain = outputWidth & 3;
+    for (int c = 0; c < outputChannels; c++, filterData += 16) {
+      // Load the filters
+      float32x4_t k[4];
+      k[0] = vld1q_f32(filterData);
+      k[1] = vld1q_f32(filterData + 4);
+      k[2] = vld1q_f32(filterData + 8);
+      k[3] = vld1q_f32(filterData + 12);
+
+      const float* r0 =
+          inputData + (c / filterMultiplier) * (inputHeight * inputWidth);
+      const float* r1 = r0 + inputWidth;
+      const float* r2 = r0 + inputWidth * 2;
+      const float* r3 = r0 + inputWidth * 3;
+      float32x4_t input[4][4];
+      for (int h = 0; h < outputHeight; h++) {
+        for (int s = 0; s < steps; s++) {
+          // Load the inputs
+          float32x4_t tmp;
+          input[0][0] = vld1q_f32(r0);
+          tmp = vld1q_f32(r0 + 4);
+          input[0][1] = vextq_f32(input[0][0], tmp, 1);
+          input[0][2] = vextq_f32(input[0][0], tmp, 2);
+          input[0][3] = vextq_f32(input[0][0], tmp, 3);
+
+          input[1][0] = vld1q_f32(r1);
+          tmp = vld1q_f32(r1 + 4);
+          input[1][1] = vextq_f32(input[1][0], tmp, 1);
+          input[1][2] = vextq_f32(input[1][0], tmp, 2);
+          input[1][3] = vextq_f32(input[1][0], tmp, 3);
+
+          input[2][0] = vld1q_f32(r2);
+          tmp = vld1q_f32(r2 + 4);
+          input[2][1] = vextq_f32(input[2][0], tmp, 1);
+          input[2][2] = vextq_f32(input[2][0], tmp, 2);
+          input[2][3] = vextq_f32(input[2][0], tmp, 3);
+
+          input[3][0] = vld1q_f32(r3);
+          tmp = vld1q_f32(r3 + 4);
+          input[3][1] = vextq_f32(input[3][0], tmp, 1);
+          input[3][2] = vextq_f32(input[3][0], tmp, 2);
+          input[3][3] = vextq_f32(input[3][0], tmp, 3);
+
+          float32x4_t tmp1 = vdupq_n_f32(0.f);
+          float32x4_t tmp2 = vdupq_n_f32(0.f);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[0][3], k[0], 3);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[1][0], k[1], 0);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[1][1], k[1], 1);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[1][2], k[1], 2);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[1][3], k[1], 3);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[2][3], k[2], 3);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[3][0], k[3], 0);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[3][1], k[3], 1);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[3][2], k[3], 2);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[3][3], k[3], 3);
+          tmp1 = vaddq_f32(tmp1, tmp2);
+
+          vst1q_f32(outputData, tmp1);
+          r0 += 4;
+          r1 += 4;
+          r2 += 4;
+          r3 += 4;
+          outputData += 4;
+        }
+
+        for (int r = 0; r < remain; r++) {
+          float32x4_t i0 = vld1q_f32(r0);
+          float32x4_t i1 = vld1q_f32(r1);
+          float32x4_t i2 = vld1q_f32(r2);
+          float32x4_t i3 = vld1q_f32(r3);
+          *outputData = conv4x4(i0, i1, i2, i3, k[0], k[1], k[2], k[3]);
+          r0++;
+          r1++;
+          r2++;
+          r3++;
+          outputData++;
+        }
+
+        r0 += 3;
+        r1 += 3;
+        r2 += 3;
+        r3 += 3;
+      }
+    }
+  }
+};
+
 template <DeviceType Device>
 class NeonDepthwiseConvFunction : public ConvFunctionBase {
 public:
@@ -175,7 +299,6 @@ public:
     // only support
     CHECK_EQ(strideH(), strideW());
     CHECK_EQ(filterHeight, filterWidth);
-    CHECK_EQ(filterHeight, size_t(3));
     CHECK_LT(strideH(), size_t(3));
 
     float* inputData = inputs[0].data<float>();
@@ -203,15 +326,27 @@ public:
     }
 
     for (size_t i = 0; i < batchSize; i++) {
-      DepthwiseConvKernel<3, 1>::run(inputPadding,
-                                     filterData,
-                                     inputHeight,
-                                     inputWidth,
-                                     outputChannels,
-                                     outputHeight,
-                                     outputWidth,
-                                     filterMultiplier,
-                                     outputData);
+      if (filterWidth == 3) {
+        DepthwiseConvKernel<3, 1>::run(inputPadding,
+                                       filterData,
+                                       inputHeight,
+                                       inputWidth,
+                                       outputChannels,
+                                       outputHeight,
+                                       outputWidth,
+                                       filterMultiplier,
+                                       outputData);
+      } else if (filterWidth == 4) {
+        DepthwiseConvKernel<4, 1>::run(inputPadding,
+                                       filterData,
+                                       inputHeight,
+                                       inputWidth,
+                                       outputChannels,
+                                       outputHeight,
+                                       outputWidth,
+                                       filterMultiplier,
+                                       outputData);
+      }
 
       inputPadding += inputChannels * inputHeight * inputWidth;
       outputData += outputChannels * outputHeight * outputWidth;

From a1ce705517fca1551029541e17cb0ac3ddb65677 Mon Sep 17 00:00:00 2001
From: Xi Chen <putcncx@gmail.com>
Date: Thu, 24 Aug 2017 10:35:50 -0700
Subject: [PATCH 097/170] update etcd graph for design doc

---
 .../cluster_train/src/paddle-etcd.graffle     | Bin 5069 -> 5765 bytes
 doc/design/cluster_train/src/paddle-etcd.png  | Bin 56296 -> 57495 bytes
 2 files changed, 0 insertions(+), 0 deletions(-)

diff --git a/doc/design/cluster_train/src/paddle-etcd.graffle b/doc/design/cluster_train/src/paddle-etcd.graffle
index 56681ae5bbe11849116d621b066a6317e003e4ca..1b6611bccfb0034a10044f2f175b56c46a98f1ec 100644
GIT binary patch
literal 5765
zcmV;07JBI)iwFP!000030PS5_bK1(be%|>NoIIR|dt&H8Ln}MUDX<N;6AyTdU9LJM
zL>sg%B(@M9vMc}nc8ftR)ROT)z)rg=0a|PIyuSW=P_Iw_z3X|U8yXJW!2kRnmcjcH
z^_`&Y`kl}3Pmb$r<o$oYy!-U8js5lG^MlP&-*X31>ENXHZF{}+eyvie_WK^KR5p$`
zN(bMzkB&<$j!I>7?|tcgH;VcnE0x>Z+p^82%1+SZf`iII81!iv-G5_|tucjiJ8HjY
z$&6x8Wru~fT_^hT?vGE`^!^J^&UNgFJwF|ZeX8(q_Sg<Xn}7M^(;y02{=cx~%0bU}
zJ7F;Fm-pGXMrdDMdDQ$=8H)(g(6q|XWO+drHVr6<e5y>Mj<c{H(m4GMJEGIX$HE9e
za}AI+gi8p2gz%#dOaB4vPmFFFNf6HJJ6-CR?%Y}Fy6e&7`+g=ldl&_?vaF2#?ndAY
zd(@9+>&W#Z+M(eWsC}wTzEAXqZkrCi1<o~X&!)4tpGhT_K6M9f%gbczeDi2le!FLP
z=z1_@X@=R{WHR&3^{?|{&t_)V?)Wr%jQoIQ<ETrSQo8$8rXtJ)uErg$u%WdQG(SS}
zQ8(ocJ&K6V+tRzi4do5DW&3T9&RwFtAfoe^e?N3Pu0OZ<0c*M%>#x^?Ib_+7XxqOE
zrlMo(9q03?7DQ3d+qFZMgNck_yVIzzyugk`JDon!K+1e8iQSJ<*a|}TN8sBY%eU*t
zEm)Kg13TW~B?;B4(U11lj(2_r0sUU{F1ELVs@AMEw~scv)$aZY&|&Lr$8$RC?00+r
z1YUGoTc=)?Jzw9{kB`yL-xp_lpt^mwSv~rj$<*(>+x13q_P^1q=sDE`NpbuKHiO<H
z_XjPCeh@#G#Qu)~(T9?0p`Q<=!u!D?<xoypcVj*J)iuPS+vzT}C60ssBBK9P5eLje
zYPa`&@BU%amEmC=^TdM3Bd`;>H}so*Ps44$P4A|ePw*Uwcs@~MLshl?*KiO?6ri4?
z!TKS~;Y=+RCsebYYd+%p?RX?~9YJqelCdb48YIt?UjEM-vA~DY8pIm-`Jwa=GNAdP
z#D2h^sg6I6Xx+01vUtXP<EYDQZctvg{Tq8A$zLYE?F})qNfA-HwBuHb?h_18mvJO8
z=-TbzR**=EjAS-K`&P{VAM$PzKd599h{E8SE+hfryZCj~%maE61id+X#i;H#TsF3H
zLNpi+fr9d+<rxLkS;LdEo^EI%mRw!dV?eG}3UU)20yv&xQl!UCfPtf=s7O}Sz#I1b
zs^@lOCIfRdj{3I4QWA97nD2eT#-~dBWh%bm4*H&bf8^Mnq(qFgm-<oZb1D9Emo3(~
z+YdPT7t|Bz%l6?}`w|R{QTy2rBqowl5|Wtdq&awyf&>ldWgP;`Fo_OyjeTK=fN>e?
z1QX37Y-9u`<Y=L5><O4firm5^iNru5$AyV@-HndvT_o2}6K_$=OG7}Sa!4~{y&Q&8
zoyf9guxO&fh&!E8O4YMGVliftjd@Hh*=UgB5aVZY=LBUPLEXSmGnqvp%iyEqwA&_;
z90nsIBF?5}^8h4et7Lo2Eb``*HH*Jl!90*G!pUr|^5A$;>B%ZZ(l;e}%t|G8b{R%j
z@X}^59slR_I}qvU(g~cv=8trsjeZP<E%uvW@a~_>tH6(<mUnptF0Xu-eL7v6G0zB;
zwy1YQBQ}%UqeJ@VPbTXGUJx?jKQBYtM!J642`TmYmzFoA>@$}7ze{@8XOl46p31ZE
z%f219FQYr%0PKIrES`TDgnuBU#UBLl4~%0GY{ep)7K^aFgON7>FkmcVm`sGn>xOK$
zG7N3+@^6O+M4W(9|H^ivAO!E;#ibiyRJwGoK<U<woUR07B#m;$07Y*7*;Y>LOF**f
zF)aCJx2U%f2D8y#+1`Lw=EggY7R6{6q-T@)U5ZQQbg5|<ko4%-kW(YG04fm8V2G@8
zw-~{G9^&Eo@TLHbNe&tx1T1*A?o*?nV0b#_!xjWTCDk;M(J<men^o2s$ZCde8irv3
zy;ub^6~e|&&CoPp5X}HYH;YuTrg0L6Nud<9cz#X2tjnw*UCPG*_Lb{-zYwlhc6i_&
zyBda>%uUm*vx=e<k=ew@bbxkj=ROm`3`|T$1>5xs69kVOk{-meV#0?FgLgv&a2WuT
zK#X)?Xa;9^P{VJY;+YGhc*YYco<>Y;8LS};(?k#xq$r-EcyFEJl`8!~rL?xz59yVA
zrwE@Sd~yUG@9ad~pu;ymYIevMP<sptotbE&a5&p!DZQ9A<_wD2C9b?UWJo@$<uy24
z%TME~$TW+IC*d;2gECM@MKVa!*~p_YrpZMa7l4+I8DJ@H`Wtam4qOsKk-s><y0lh<
zxJ<APHB$$WScLUdU4vz0Lc;zGY+^Q|agu9*x6W}juqckJ=}+ajrlldkrpdYvP16Lr
z;<$?AzG05rp;3jEa0$HjVUz@#2`~m0WRrhBphJMm2t$G~vP{DMSsW%c^5$U@E(|8o
zQ(@9Vn#Gz11BgunYgoagg2^`wlODAPR89VuKKU;IN%S(CFo3T)LrBV&ju;)+0Wyh6
zif6IFC|ly67ROM$#RAHvmS~vfe2J>b>nohR9@tStLqB)!3SZIORVx#uArqSjB4{GM
z!jsvJ{5?n+UKlCUo=3_E%Nx>&VM0a96e&}rOp&rx-;p84b9ZE9nEBLg4NIHja162@
z*nq%gA`(2vREVVrog#F<n9wP(dSS3Kt!IOk{xV=?eJJU=@z`l5Qz`3)!6vQfK?vhp
zR7}o7Ive2$*M(|JRUP+R(A<)gv4(XFuwh^g@}mJ7f@K7-h6prGWa>uzeuwtvH%2Xe
zVTb>oI=M7VKD{?h$nZobnx*EJYHs<K=azwUt)`ahu%8?;pTO1(gV85gc+xGFVIyEM
zqk~+=EJFl=g-vw~PE9;t<xT>BiAf`thAl?;ECcbkc?n~CmS&ln$q0t7A>%jMN+1i{
zO0b^D_88SR0MtzlYmD+)K)vdr=$_h2P<(HR=R2&IpkW4xX&5eIO$Oks$<iOYu!b#D
z(JV!?R*GgBCXUZDS%gz5^5z%L^t?9;6u<Zl`9+&vMRtoZf-%+L|4oACq6MtSxB2CH
zgOxiUuj^!L*4CSU8D59Vtd&{6m{}j#q1~fwh+jRsM*M~soiR!N0w5!m2rb603v3NN
z&(`K;t?Rr=Gd+JsawOWC6-#a%v(kT=TgS#Rbe<hU`0N-4ecPegSSDA#hMks-yw1YQ
za;dvorC8=F#j!oOF2RQ|>qQVqv&#AsF$gORQW&H#XjL%?Ef|9s30Xb{X$pfB1}O|$
zRSd$vga-k*L<G_m0x1M~aRfR%u4hgRwC%`Feu@r1rh%g8k7;1+0;IOHe*)@>Bvb95
z`={XI?C>}h*y=ge?sju~8hGsO9h~?2-Z^eor-9vzubW(oJ<O=rylo-%ueTQ`=im3j
z?Tw>GqF&wUA2l}{*TK=I5O~M(c-TM3hy7__3+Y|Qzpkd@w43`!-5vX^2J|HLCb9-C
z{B<_YHj8ubrq!_Sr-9X-qs{8s?gq<6ZyI>fsn)vvubW=~yeXEwdH#K;dx3Yl(?F-u
zJ#KG#gNt+fC`pJ_a-C)8`1@&6$>(P~gNxHz*Xh;$M7>wPwd;F;p51v#mYDkHrha_V
zsNW0f?fdiZwcrFF>Pdw(PIzT(Ho;!f94EKi)oyd0m;G!S=yfhw*)MKdzEJbLcDmKk
zgCtJBNxhxx>u&Y5H4WVTaq-vp=6RL<IY~a*U!|gNS6MC+^=b|AfOfT(G+ec@TRm^q
z_EVDWX3eXzmb~b$w}j>x)Oxq;t?ITd^pMWy&!SnS`u=Mer1=9d3I=f?$Q`#Yok1tG
z?=Ky&N<%mr4dL2?hj0Vy@kQ2_RW>Z$&@7}|(10T^5o~yU^V=rIq7IfB@Tu>pYdWz2
z0z@|mu}rMoM5w9CTb`;^8O&V;VJQl2*Jm|X;-k$HcAW~~Oy<O=z>SaObd{{z%8)s=
zfz99I;s@fndoTVfqe&!CoS*x7;W!V=M3^MG=XoUPkvBDxB8Wt|z^aSwI*TNdfE>Hc
z$+JvS+4y5^eB{sc=Tqj4vgRu~6PkNut<XY>PKFkq4bAH}lJlW+oe<4GBbs|kG_R8v
z8nl&b(6ZB_Sws_zz=Zfi32CH^O-5Yg?@Xt}`Q={(%6r3XGLOX=$%@W`5oUSVR=|^T
zQ_ejpI`3K0!qcKM<9q1bXW_}uIS^eT#xFq(1H2q!7{JPf08xrOj4KK;6k;gESWU!$
zFU&D8CJ#{y%x}a%a}W?+>PAk%iGq{Gz)2x8$k!JNR~V$$BCm1zIzxJ;!QgktgNIfc
zipnzoYC|q`q`(8qz@x|<W#un44XMJo%BWB6s4Gz^T%yjg5@}HG;zDjQ<kD&};<1Yn
z{Q(Vc<f{=<jLU{R+0OXwiNstbGt-orDKlHl5hT8z{|qzBp<;!M6TQSTwPJ9~SeSn!
zED9Mb#i9%a?+V@*0`Ji?;r*E`3TZ1vqfBLI%Ff;ljnWm3dLuMSdbv))aX9nWoTq>5
z_wl^!iL}>ubO~ak3<!bTg|7xIV(_atG(37)4H8l|3|+U_bMDgDuyS4<`6Wl24C_~e
zo%Cgr{DHJ38%5`Tog??9lBdKHzDO+cmrEXey<|>W`C3n!RXw-kXR};zm{*-qo0n)k
z$g(Q^K?Q#Nw-^2C_CNMn_Ws?8{(Q0XYEObo2v-j*O`yP1fu#aV1(slGV5ym9ApYsx
zG3@9iWW*iv2zQQ>r5>6$;VTA9qH7j_mSJfa5$-YbNnkmDEa8j9qQFvt<qCr36R`1-
z3pWN)3pZ-v=HZ2#mEJ%#jWQt`FbRSPSs?ZYV~w|Yv@*hwXjo@6BuFeHcNX*ucYVwk
zJUk;E<_q?Ga)x@jM*eXB$U(MX$bVW);fuwTH}uaj_)E9E<Y7<Spv$2F4-tU{?#{_6
z!Gokg?d`oBF=z)qUBd3U3}iKHg+W$VAge%Dfvf`Ar+{pq`fXNQD}u8Cj&?v-6lo2G
zv<hi|L!^~l_cu!Hm0tOG2wLdyrW-WZlRNuPu4}cOIEq~{p-ua257=kcFuC^b`;I=k
zqVIY`ytnSm>x?e*6Yj#Z?)ja4EY2-4&W#qjyKy?}C&E2|^}kpRtzY!clM+?W?f4o&
ze(_#%m0qKMKe>1>3B-qbt8q4Sw_uHD2f}uv?h08tEx0kTxh|^L_D|qkmjklOsBP|T
zH0!%U=C@C(<NFF5V%ZxlqyzpyxSFupm^_#UZuO5YcD9@Kvmd)Csd)$*{D!|R_Ag23
zpz@!iYR#)QZiS2dIxJwTS+$$Bzl1CQHmk?+1L4X)UgqlAd6VDpmq=a=sw|+<4bDH$
z+GncEza}oS;y3<1c->zVw1Z!H$kW8?kms9zSLVU%#YoxIKa@0Lj7g~pVn=w|@-|G(
z*Lus?Q@Tcu3&n?=C@%U4u!+=8lG;gn{t@7rV;aze3~Mwp#u^KkbpY7ZSSLEbSYr<n
zFqQ^9*6(_~p*k9{%%cIl_*z8P&UQrSIU|tN{DEgXv`4e|{}eXeOP#+$nq?AGgPLU+
z*x)zwn7L++0izkqGHe2rVJpL4nqk}RcKGYgMH*#d>JS*nLOL{osj&eU=;g#T#iNnN
zGLMlHsLWEC<uZ=q{m)@m6n#X$^0YG26B>*y!f&WESJEnxvdYI=rF84y)ovWz-n7o@
zKiZAc#0$<hba!)(h7ImBXc~Cb+}s_xvboz&R_VvigSgM2WJP`KJc#=YO0qO|9>jeH
zB}?~X=Rw?OkPzp{c@STNPnNI8&V#tmpd`+*^C0dsNZiYaw*vSMfKc|4^C0dsXc{<i
z9>jeHC5z`{=Rtg1AlXS7?+;W(^@;N!zI7p_K5`z^5`FMYoClrU3XA*`=Rw?OkWluK
z^Pm}zN)xX_O`60xb{@oi22BGe&V#DrZo<TQ5ce4*Y#fZ72XUW4N#@7SgSgKiVb^2i
zJc#=Yng&js2c>)l$@UUn$1wKV_Yo%C(+iyI#h*Jj)wy%ETKFOt2H-NY6^u+`5TYYa
zge)|^FTjFjU}8-8(t?IH6D^e`Y3dbN^$M(-O8(|k$?^4Pm6oGny2OhEvamV2fN;e?
zQ&*s=KvRLH0?jFCwpZo?0mR+R=s>m}XPV2rK!6zP0s(b_fI>}$nlWlFel>?+wVLw^
zS96M8d}}6ujeaz`X&*`k#*1<2o!_mjXLhUk8#?DE<icB!U(6Y_WH0FTzCz-)8++I4
z6_OW!g=GB=zd|y*<?|X`q_6mP&%-x_qY{mr_I&=*3r-FekhxS$X+|Ldet}J7_J-u3
zpTbf(kB|&{k}-B}=kGa#$|6$9a)uX8Mf{QaF3QL`g0PCnCctZiGXw%4oA6>Y!w}^B
zU><f*)I|^gPwgU$Zoc#n8IWc3lz}=m(Rrk^gPovNs%HA0v?-)GmWyIS93p~^w>8~%
z+Ucs$v{Q+rzU{C)38{@;rHX3_vGlI}R}gZQ+0~ScQQ9(N0%a&Op_WuuCfS)=GMrm7
zvo4aqAd-TiRDDRQaMDOKo2p`S#ZE&H><$$SIYE#aJudK-(*AL9<k%vHh;Bhg{zL71
z8j4X0<65Sns?1iHrOp6o%G2m5(!jZXKxEJENh>nWPf6PJ2P`1n&7*Lbj`h@~w|Npp
zeV?To45Q-SU-jLd9f_IF!_ZjVaYHBAb^Q%@5M`}}WXvU&KcWm9?8puLj0z}&jE)y)
zd(-c@zARtuAd+N^PYna^g!vowJE=+HBq20e5w-|Gm|<Tg;W&;*W;VJF`tIC1vt55b
zbQ#FaXcqSWxV!B8l-o>!0UQRmGwXVJ=s6M3*zYL+*buWHkKbKNqhmKp8Q{1*)@}dB
z9?W#~SnI@h{~l72D&)NhIqLF{!Gu$H;I=$^8o2GR^nSJk0)|hkKQWb2k$P<GitCGy
zIgO#)rh}>zxi`}y$`=_DWo{3)@6bo;o($-wAGu+wIg`eWWzJlmks;BFr#3tyZo@ES
zxsID<TK1gQ9(fiIMU+aIA|eFv%@Q1MF>4z8!MRJCE{?lEIzs{?O`jhQJsRc;uv%)_
z4?R9}+kz$M5=&9#f5&!fu1Xm9nM2wUZ7->eHOy&qToo4Jmr!F2B{gvo1ie$6v9P4C
z3S&)b*eNm*Dc#5f2>>s2;Ax+Z@iyr7Lpm7jbA-MYtYM~8IYIxPq2)DgZ}+&R6-ct=
zBYMPg>d}LZdcGiIpNw@@nYbt;Z302QIkY`Dx))VO<pv3E3gX<YeJZm_jbw~{T)(2t
zrOlqrL}@(;+4whKj=NFMi>vGSzE9^<aec=dwrS0Ft~(*0rH$uNqV}Zkiwb*zA0s`>
z_lV2n*E^fHMVoQu%%3d=S!{HT?JHCB;)oRsvkrB`7|8swGv)Ta@7?F;nRU^?8}ea~
z)qidGAlv-uETrTc+x17uZyoP`dz8Y?LE};K2YZi`Ki;lCq6BQ@j0W>n{J_4U)j{2}
zqdM!_`+cr1)5ICg;ebwQaDKBfsol8e6pHKxksMGaQ1z+O_t<#(<=y`SEsPXH00aR5
D8X8?!

literal 5069
zcmV;;6Ef@{iwFP!000030PS5_bKAJGexCda-aMR_u_^2TGnt%{yn7Pwj>j%nouVXK
zVoZw~inilS<-gwswQ<#EIkppOCf33R*aW*jeBA(n_y7HR;~95O&~yFHpKq~=-WpD)
z;Wyn*>(95RC!4G4?SDVKdH;Xw`)eoX2OCD$b9<q2a9aDgvu3<qtyHSruIE%L>nH2R
z!N;BB69ej~R5teB8gJWS*nL;2+~41uww5v*{*4ywRSp8b>jdG$N2qKS3Yg8X`4&#o
zuRWm-{MvLI;fFWBy}x!IK2#f_d*^(#ADm#P({#SRujpUlpxX(ZmJ@tH?<>QP!y<5Q
zw_%4+<>!9Qqe6BN*!sh7?|WeYm;3=VH2s^7+Y0>KuDK7NwgUU=%5&uV%0MJ0jioRN
zXip@yEEbb^Um4ci*WfnvtIaRBy)dIL<{5o!cH>%~zte<sciV1bRA|r;12HYp8y*|!
z^zXl`7!k5!tV&A0uNePQB*cnAq_n<&Ph5m9xaQftY#qmKyX%b4o+`7mJ}xj+oy4!A
zFLoS0c+OaIsC)k)ROw%jkH!x7kFN*z{aCNDW_$Z_b)cO`O)~s6{?KpBo^#>*H#3;Y
zn%%jxd;5Xga>oWTY*e&jpm^)WN*uf2oQ!<`7GR2bwv_MP-gK&-+v;SkA*)p+ecZJh
zP{l|-aBS}b74Iw2hoLBZ(MG;*hZ%(k!g3X{Rb&~6zhmefqQ>tCUK#ClC4P`DzwLN#
z$2haK4#@)RZm;Xv562DL%bXSb*XV}EpGNfIYr5#B>p3S6-ApkX+$Aj=8RNR&xV>>Y
zVcLyH%||osH!$L!Mw%SAPg-TlrD8rMkTMaqkRXT%hLB(@hOm?=B@u#KNMb>;)b7T>
zR47WiTAr^>ceY_8g(uF}Fk9|?<2Xm!^Y62wM@}y?(zJBUlgEun5lc98U-YBfnc%MR
zisCiVWKDhH1Hb2nvD>GAkL(zk(n1uP6ALMj#YD`)hlP*)Ff#JE-Nes{Qo2`?cQ^@3
znd|7a?Pk0I;{y4>RD?|NGv`Cza_UZBV9HifIwX&d?fWtaukfYa>ABILMz@6tCzlPs
z;oJHUL#+R$cUy<=OrkgcUS9c~Fsyr*SLpJp<HCERZ3jIkL<XS6ofATzyXV|Ge|!ho
zhVS_S2>)>zI8DO&Wh-!;j{Z>hZXI|Bsmrb%G%v%iTp;+1F^S%U(r<*Y=uIL0MrkBM
ztw@BiNQ9$E1WM~Sfg=$iK}6T<1|2)R4Q%i7pN9S+bX)Dv>0H?jKs)s2O>`AKN{q|K
z6*4@(as9oZr|Gxk8Tf0~(ULbR);lE&7a)Q5v0@y6*52t{`NcO^OXS?#(alKNP~;A=
zzH8)$n8ds;ecjuR-JBN(uwZ#Oki43)IGgOk1)xQ9Kx;n;+kVUM*j~*JGzq%_>}bS_
zU)f&IsT34A@ZC-h?*7Xy%cP7-uDbwiTHqam@EpKT)s{xRerq#qN*-_UK19N);}$aN
zvEq6N3>pY-T0&AqIptg`l~QCZF1Rv5Wr=CJ*Gd`s_*^d^GPOBYgCL>6sD6~Je$bIQ
zExYlM5RX|wthY<`FhyK<1g*B~&8o>H{Dx}g?`I2py+Q~oraG8&Z6|=cX~T-xO>}UX
z1gF5CagIMfiz&|6!zOtB1Y%P5$HV@3YG+*O_WsuZa}D1a<omSnZ+<)H&uix>$Id_5
zt&TGl|8XiNjUQXh9()<V{*>Nybh4Si$EF?H{g1V3_^r9U>z$ur<osRpE_Sy4Dm$zl
z?i_ElYrFfWXcNKl?s?}kiV9qu9i8YO>NkyQd*^WH#M?VKzv+7C^sw5#_-kYETO<<|
z0KuBKb6ET9zHwJS+x*tt`ux!L57+F|>spOR1wKK64f|}be+K)^x;_7UmsfX>H>zjX
z>(~DAnb&H9DtmYJE$gA(jS6i;q4oY5$k)3@YwfgM-TVT&uU*`nSF7jtaeLQ3+t}bI
zr(2s3=V#4_^S^6;RN#~z@%q-;7Ss$EscXi#xwYxmw>}@jncEk1x82xk!!>xl{zV_I
zfnaU_6xZ5~o6QdVySF#@kaPCc(`wc>_SO$KKUJ$cr`4!{*3s*pZ-I_mb;8j==h64=
z?(xO$&f(_Sw@)N0aE{Sd6<*u=&q3w)<7&;TZrxX#EqLEPtlEdQFV(G$>Ir<03RJgG
zt6Ht<+4<qk{aU?x(W}DyR@*=Sb7)$lyZq82RUbh8pHcK9dNC+VjLMDO3!Pwz04o#$
za?AyngehuavSL_7ni6NgR=x6+3E^r<DHFowoGBA3s0A%akfMTM2u8}3NtrTvj+BXG
z_nb0k@=Q4syg<$*gEdc>Gm!$B5=lhCNmdLcl!+`V7f+f%1@Ldd#0llf)M{9aq<f<R
zI<3{$xf6y>j*-PA=4I|=dAXAVI|S;{$t{6jTDFC7vBYc(vxG@3!oV3RA;RLok~otl
zRYGDgaLtdf<gc6{VTqPSkcgUhObMEoN+c*E3_fA2BuFJedIo}YZ-<TcJEIZ?HrRbo
zF;-W*fpg`4ElJZektVeWX(DP7(i8@_b8{w)fOHT!7-|B7Vk)@6$BZ=*;5$-rh{*v8
z^-C_qp9YyzMJym;v(P6C=pG}MCTwsaf{c5&LcWDTP>LyhOekR@XdlJI&(6m-UC(od
zhkuDtEwGPyoS7_<lnsVTbIc3a7(q;XqsgS;+839{%BBMEn(px?nNWRZD&Lw}GI-pr
z&*y`JEj;XjRt#K(3vg_;@ZhtLm(-k&V~6d$_DmD+l8!w!9s3leyAA)!_~O@%>Jszb
zOi@z_Muk+AVavi4ES5-Yav?ZnLUA}IevZU9;Y*9_*qm`40tP`TCrW3(71%E=1G_S?
zD>L7(K8Q=_*PbBrP1q01d<%k1h>)x(h{K35lPC*Y$fEsjV8Kvs6)zOJF&=eBKx~tp
zQ2{NArWi_&CdPSX*85k;dSmun(HKNvCgEuO655231h*nsU_v?Ol4(9=k=M?r*wXkE
zV{=a=fNqd6=n5s1K&x;`r%F2Y40H+v>>CG=vG;_Wsu*pEMNOGT8?2xUWYASwf-p>3
zG%P3Ot4ABTG_(=(q79M?QHbEA#gK;a5^YMfc?Ps;1$NVQfH;={+_C~VLgz}{^F;zU
zmKVUyHu*<cIc$<slPk-T1Wyi#`lWS`AgoCx%8{qpqoWrX>+q+Ib$&HKn6Et-%IKsC
z$Q*z`Nfr>MxOckrbO<Q{;WY!od_CH7=LhO=1WUn@BEX(#WSxYiZ0)BQOk#9t;Ru;C
z9MQa&f(r|Qi2ko!c%lqP%CV%^KbDl9Lo_3qAU|$+2vM`YfP@|$Qpi+@Da@Z~bcl%r
z6~b6|HgrZR6b%+ln-o|M7X1o?Mf33`x~u?dORw6Ob(!HBU<KBnnWmK@V~Omr(#yIL
z!Az8%%!4JEKTFRC%vX385>|-5NtN?w2Bu_kW(OAAao!BUsnWrW(s3tDeaYC<q|wKM
z8H34No1PhH(&Q^W|2RHbFfB{7vj<a?5;G-=#dB22sXmUC`DmGE3K_)AwS}Cyh!bWb
zPQ7m_ewU+GTB78DMI7*8H~?lGJHcK0k^EFY7XLgKkV#&Yw<xjfiLs1763bSR$p{uy
zB7!55MY)XN4f-VuW27Gvk(6UK0k2Xuu>2Y(0%THFBUoB7sW_Rv24n#Srt}hm%y|*>
zTtj|C&eM*okG_c2MH{m09e&B);iUig;EbI@Cfv#ocrji0ag2-)F+JaXIv5pt@r+>h
zSRi^N3{6&lcJ1?<Z;tYhRDVJEiP_a6b8PfnSVf7+Es|Obh)AX%y(0377hWTXz0`%*
zu3XQXl$6Nuqb!^z+0ih`&8_51#gp8*Olh8y!NMhbUAy7rqJJ-F)n{eD<qD8L?+TE^
zXXfh-Mw{=9|9IgaZueamF5o|{@Q)=vyVXY~giI<4R|=3(alLcG4@D+kB2$S>B{G%B
zl(Sr|R95kFwdpNhmX1x?@wA^Gn-ogE*v!~8OUYp2k|j2k*fbk9&5c6OHoV5TG$AA;
z0#geqC8=IB7I+tC6hW*AZOIu|8u64gIhMfA1jZaAsQ^KUFbRaL<NsNiz@9-m3nY_}
zWWlUAwoq=?dxn%{3fXg8Guz}TsoX}Nl?&$IggHwGk1CyWlP2D!<ef=>n>`bpLH=Uw
zFVnz3Fby2qy=%h-D9S8`vz+n4TqLI4lo$yjBoI$RkSvk-H6k%Ck+?+S5{Z|N#N<hk
zm;w?b%9tb!AXgB^N+kYSkeHN6Tq1Fa#7jqF`Xoq<xn(M;C(FSVamn>aza@z&^!iSg
zeoD*YI7-I?IwPv6uqa}j3krh7GO@Bo|C43Ia3RLfr^J}WUE#+#T){;D(ygbub<?>D
z?K(K(z{B;Q;ZHMpW7F?AliV=xX{H$;-gvarQesLAOQ1;LxQOD&YX$HuOJYJOBA6>8
ztY`FER(fsewa?(SU8mE8>v@TetDi2jxT&y`lqsb<R}%HM7=B}zDWNa|F)jFPe#=Y0
zE&cWx{5JHP{tIy0ytJ{hq)DWLK8-R#l_kmnyjPj55M{EWOjeZ1ilrwj*wZ}VN0iAZ
z7Muww2)BwRQc%K8IfQFsA^rHl)8lX<$jBEAGE-({O#zwNN(YZB{klPBnYNxgK=Q(D
zU6r<8+WO;dJ)d8{Fk81uTQ6;W{uEGuqd}pH=e%PEfn>Y|MHw?b#i|8-;qb3~<oss{
zRsOjJ?Wbp$bgLeCfL1q7C(N~^%!K}VpH|1pddx#8llBKJ-aK%1>QOxZ6VlJOj8(My
zOM1=Qyj4ZZm0+F_Q>k3`N-#4nzVP_v7hd*CFeGmg213e}V9IX!S6vBa&JIF|!9E9=
znHW=sta7QtAJi$Ra;d`-!b%8Rcrhy~Ue01x^Ony@;w%MFf{A$+!^$uSI+ym(xRzC}
z_MFVM7S`%i4)(aUtgsX&!-Nvh3}K2b9ZDfdOev{kL~tRL70oZi?DcOLhnKcP1DbP(
z28D=TZP}7aDT@hS?)F{o(C{2x*1a7z+V6}?w^uP%SG$38<$f(UY*^mjaq8)}ib9L*
z?hppI^Kb3|K@^gRVjW6x{Zt5a3XDqrgZFh9t{pUGBkx#&Xy*s;8dD@<zoJdB2#1~*
zqC(Z*<Xp*A?^jfgV^iiwndRnLOWi)=88?H#Ts+s#;n8Y-k^n6P$FryuKuE#<f8=U@
z7?<memaF-dD`vfp6|;cUp(kHnRB~c+K`_L}1QW2;AB#s;iAN<Km3Z`8@Cg5iMMW`s
z&P7GJC>Iqi7Zoib>9s)8G69I;Xq;JKWm1e7Mueycf+7T>Y{g(mnL=4ySyE!ADbo@&
z=u<OgX%Ej>aed~kAzL#`29Jx<b5T~Ig+UgXgNZ09PQ!}Fk@^ESDGFpw8aPVVy@U#;
ztGxWgQqEv=;ZSp#tAFBL{jOg(s*{uR1@l!4dWuC5j#w)OA22Gev}Z7GnY;wC1wah*
zy!Gcxr~R3n)(IQUC-K=a*bk;tyUn=UjPBRy=rNFC&|!3j!I;;WHV^vHFMgoKlU|kr
z2s`W6$i!J^uII>QrCKvd1v5#dD1Q;(k3nDJsxyr=Z2Cmrdwdh3#=Ui$PVb}NxOPS(
zgt_EDCjHs%x%IJ|=lf@W{N=!IIU^X&A6V!^%nb;x0dj{<kP*ij@xL6Wd*UBA?3fm#
z!I}QRjQ!vQ<1>$^l_YJU3f>65o#-#po5$<MnQ(Z|zR9^~(fN%Wt2zjh1C*)3%+IcK
zpLev#b>KYyZCLC))sA~(hhqf`#=P~>h8r~ePi|-3?S*MaQz=i2WRD#{5IZqAIt*}8
zO5Tgq+~~C86I6z}H2pB6jP3<|J-YVM>9k_~!SDoR5IGY|AwmHcQzwkd&!J#=?|0pq
z<dgmbDqa^kMAm->B!3S19yr8t+lfU6eOn$G`X|5XUO^wDwO)_i>0zY=VY!OfDy9ZP
z?+|%Mkny{2xy-quiem0Jo{8Xgj5AvUOS;5eg!|ier|CH}U(}u-I<q&ye&E6>5;tW6
zj({|n*vyq{c&fpDz3$B*s~xx4(e{pOei-^UpX>lM7~4-Qsu|RG<@t7)1ct$9hLSKp
zMy>uCw*A2UrspMq-fkylsM!-W{H6x=oD<cm;kV}Yu6KThk@I)WyV%+GtL(6LxO2SG
zuD1725y$njU9ZtvgYV7#Q+&~`Z-4fxaC~ippPZ20e=g4UP<7{Qqk8-g$ZUS~?$@>^
ztN+ot3Ln!v8igj+e*oM(NKB5&sXjZ$ZZt&%6P;pBPD#u;i?)L!=MK6TC&xHn!;Lu7
ztnVrB#EIX1it3Zl8{LTgj-#EpfCU^mc5}buJxurRBmX{;qMSfJKs3d)`kjQH>^$rv
zqF6}7_K6#2foRR{+}XVZJLs3V0Y)CFp<%U8wyKTLy_<z=MTK?WZFd^ZoMlft?mxHA
zIL@1J^`|{&qZ7Kpq-Os-EA1Nq^=IF0{^dL*n-eEn^UUo)=va)uYqvoFjTSX}mI0^Y
zkSJCU;>0b!$h(FC6ipM3Rywp$Dnl^1yGd>0Ty1##=+<+BLIJo#b$i<3Gq)LMKjWP#
zbFOGm<bNl2eWn%|wEmIPO5oS1CD4k&2J@v6(Tze$PD5Py{>_xQOpAQBA;RtD;z3TL
jGsi=I%c(u@r^FocG;)mJyB@g8hd2KZ)%R%icGmy^B%t!%

diff --git a/doc/design/cluster_train/src/paddle-etcd.png b/doc/design/cluster_train/src/paddle-etcd.png
index 4f9c9762b3a8c089dd5e9b2c07cb9dfc78296a21..4e5c3d886e65a654d734788afdabab3fd15e0632 100644
GIT binary patch
literal 57495
zcmeFZbyQW|*ES3XRJt2!5P?Itq?B}bcOzYb2+}285&}v{hje#?BGQr~-7SdkIvBs_
zeaG{B<NN0u<NM>i2RG_DYp=cbTyxF1<~4V?vf>kT6cQ9TI5>0}X$e(0I0R}qILHbT
z1bk9daf%H7fOl1WA_iABdVdrA1KCMh#}y6^6&LnDJlyLHA~4{Kjk>m*wu1a4Ge-v&
z6LUvX3l=X2ComcgPSEQS_^X43n+c_tgZ)$2M_xkIw<8{bzr#LerKY?c;$|mAt*xL;
zDemZELCMX+#ll7{j6z9CDd=Ku`AAhl>d)oiUqaN@Zf;JGSXn(iJy|?CSsY!gSlRjc
z`B~XGSUEVD!3bto@274iUd&HjX?{=gcOD4~S2GtICpR0%r<AaHO-vo#-Gr#AVK@5s
z-|uz0*;xMPPETF`JQjFBR@gVJ>?~}o|K<jl3c@~pB<|?o<YM9K3g#E)7Q7w#zkT+f
z`}`iSZ0+dg2&CX*V<z*|&B6s->}CR6oiN9rtN-T_|L0svE;bh6sc)}l|8w<!{p`<p
zK~~s<{})00mh$bVK+M7@f~^0xOc<qNmE$WMoG6@(#A9_Y_?=AeCH2q0e%ZvSCx5^S
zSDq1%tm%)(3r2~Pu1e;fk$adI9E&x(ql1ehW=>xftSmcjsQTe2x%bbEv7ZZR4f`|e
zqr<K@UTtGS=Or;dR(r>-quUwP1NrGs8^eXEDIx!UZ0P*EXA3KN?5ATVO*K-H!u<dI
z;0eI1YrZ$N%D##G`%##vC6ctIMM$wMK0G$o?a#w!ka!)*7pO@8pN<KOY4?Xm(AKrQ
zJX-4*V69(z*@Y=0$QQkDzrWDJa5>0Se$un8j3^<BSS2OO!DLsc5r-5edWKZ7b(G3(
zwl(qoc;jtMnL!he4}S3j*vb#EVuz2~mBA(1%1F&udrcz0u8%i9z5R8u8WWC7Pas=2
zfr*V3AV@=*mSjaJKY}07`QYH^`C^^TXUnGD+G34j&AwM;*fI+@!sqj@_sG4LH1!<5
zypu^l#(m(WblLe<D%v!*(s{Kv^u7y{$)XNf1dxxY2TXdQ)wbC1*bmd%qlh_4c<e8R
zSldo^=gzl^%l)o)>##NCQdnv-7X7XqrpgSI_}3DB_8JOQ^JGi)>ijnIQgR6zZNC&1
zyz%=np{BHEe|5HN<a^jdCzmXt@Ln4BWc?4}2TGz=G?u{*BV%oRDJJ}}*>b+HJ>PKL
zLufSIzStRzdJmJ_UJ|PNVdJomqHV8nyTT=!LWDeU5}kJ*tHv$LkV&h|ey*0;>axjl
z<TZVk7qzXd<n04UA%h3ND@P%O$CmbQ2ao&6eM>z@?9Nd2XGMV;+b<E)0`nFr2CsyF
zSfi#eYHT$hk6x`rviiP_@JLqa1d{=|MR$>K+urw>HX-5V;`qS?p^mkHSXAl1DZ&K`
z6;Oz%S)yI(b+|HHW9KvG5~eU@dN6sEHS4{e$)CY(mps7WX}dSyP~LX)C^X%nHchfj
zukPv3AE%4Y&kb6ho$7Pk0ZJf2iHb;DT?MD30kru#Ml^-RK=@)Qu)OuhU@n#@L7{f#
z%J+1~(TB^w;-6L_hk`}b5J{w-CpPG5l<Ciq%6RQeS6U1uw``X+X>$!JQdKv4?vIv;
z>$m!byFY{-9b!757b?w+`iK(kO8(w2p*8Kgnihlh*FP`8#LALrc)1Z<Q{^#QK4V!z
z%wvl$evO1J$2}pp34O?EuQi%~3U*r+4<WJ@C031DuT~H!U(2CaM|IR>Y2Cu0^;=#;
zlRta;bH)6(ybo~O-G73|Zv1&RSE(3H%8OAGw6DH7o+lG3Cm<eFGAHb~Aoy-N%_{41
z&autFb%eVNDWBNy=5pQqKEKPigXOMD6{=73I*>QMN63x4QJkX(QaHjtPd={?ryci_
zI$o^&eYA>Cg`+M5#HHfFw+BC^eJ#aE*lk0Zlx5!P&2w$voTpzKll>V2`k@o}p(V>V
z?Fg7$4zr1D>AUi8gfrL3L(oZkhaZ<15gK}#wE6uUru~kxKUqquS$uiCp2DOxDv4vv
z`DW`&@!N4C)stQ(C`_PfH2!gh989iW8dcz&$U{m-*<8QNzr%d%HyS>ecLPq|Cz(0>
z>gY)40B}-q@q0?d(SPwjKmq$3l;Cf!$yXk!OZV5P#hxR^>Ku&>UKJN&{4;0&?J(Nv
zqZ^zegn!0xnU*pQe{?%nGyJbP`z8dAQ*&F;*541pMESrW2tIBs)cALqw-PWq`>Z*S
z{@GP5MPL?}n^|m={u)%#1>D%-!7I7{yQKdsrT;(aQUC|>*5mi8d9VN=r&FT@=xa<n
zEKKnG`ON4Yh9t12qKGbXlr$Wu0TyrO&lGARMGvt=ElUks8s*bC^5jw;=nIl*DZvI(
z4kL1KSl0vlz;>?VfE)0Vw!->TE-<f~b6T;4TA4V&J*h#6<8<t%%0|@g+twm|V+}BR
z@zWi+^&e+z`}pcZU>cMdQ7QA}g#y%lDV3N`CF8@Do+``WUmWj)!GV*M1S^p%tCw`g
zd|ypP<=?AYZ5iGAoT}lzZ3yhBiNd}%2eVWGyG%b*uFef}T>eOiz~Q6#zg7EpW~pNZ
z!O8wM!%*<|g#I0*c2VGWu^gIW_>3XkPcefpt|yHEsvRp~XaW2-Eb0lMrlL&Kpe20t
zjJGd_;>BLWx|B$P_U%`&Zzq74&+Aoc_$|6ycXSuripUs+=|>;M3&F!~++y|js=zer
zJ#Fa8f0yYp$l$r)O&`h3Z9nbQA4@g4icKTM^>n`8v?s!SYl4{1DLcKn)@^fqG*e*z
z!)HZ3hng=sR+*Azoz(9Z{unsXAHXW*y|I~=y}n$3L&{|>S{#BwHe2g32aj-9-+L|T
zKDqa|I1o!%HMp!P@+~7my62ty$ksDG7jCZhMSL#~fOkIKnW=VMZ0)YD@Je`~@IXHG
zV~fvu(_W+9LbLbRMt2%RihmCY2O+IXbtj1QcSFw9tu*aP)_a<EW{PVYh$8^8%JTjW
zW?J-KCyVq3(gCiV*K=<kn0!of!TJ}!ZjRT-*72@BOcZILCVa_$90;`t#URU<PwOOL
zG3$G^J6*XNO5rzMs6wMyIVe(O2jaIE*N0EO6lpM5-h5An<HX7G13}H%tX&_OX9*DH
zQ_{Dyzokvt&RSif6a0d{8GPISR_WK(PDK|+o@`R*<=3g>7?GQ4sk;W(g(`1be{3;f
zEI!+29yZ9Y<bxVCez?C*Mt2)Lz!aGvfTL3JVJjOFOq$Rp|A4`n!V;slpJ(aifz_5H
z)OQ1uBU$xop;_AZO+Oy>;#MDz3N)F++|lnJ2@oLQ<I62eXkMIuZt`@zo@{BEHH~C$
z4h>RJVPA^;)s#scMKb45_q9l)I0Uza_h&qvTyrSOzffBO=VP`BZNke46Jw&3GuF0i
zocHDwg2l1=7nYYl!u2rO!3Mb;P8s?z3u{NQsY1kG86ptv9k8VRb%K--6`&E4J*vua
z@<hHnm3>59-CxLzicwjc58k}Ed9PQ;lG&YKV?V>8U;ceKO|9IpC7D_8Y~#~vH1C3O
z#vzDSh(X-Izn&4Oygi<WYWQVXHADFM4-h`4RfehgK4w?7g6TF)S_mN8;(+5em#hUL
z;Fk*RJ`)fl3v6Tu#wvI&KCi2(^gjL$4R)VUd6Kq138L&6zLhWnW?kcRQ;U0+BxjqS
zLlcKdEpk_YhMdniisTb4`0Td`E0xEdYLEOqbIMpBUt;9cqs`bLzV%CD(pvC69tkY&
znhbW1L=K0(Uy~b9ml&n)^gVnu%nv_NP2kplsiBMjTVA`Izma7_yH5a~SG`m;$>E7p
z^$1U~L6gTsqMl=m`-InRY<s(Nzom}b^KT!+L`h($89q1Sce-<X{H1|!kCVrv`H!e=
zC_y}UH;hs2Rvx!fp@vDHLnZ1z*5zG*za>fjpQ!?T>D&D<NqOx1&kYNIQp3R~t7y0R
zJ(pGnvBfvBLSmSmisghLL9f1*+$qvV9Df3r*7I!EVla`xppk3&1V2MwhJ;H#<n<A3
zt)f)e#>Jn-wqE_9hC~9%*4SzgJ+&WzevO9Bqr8JSS!STOgEaB0llox0yp6@ERS=E0
z)?Cx%tIk$5Y=yLnlxVC!!|QbUB`6;R^h6R4y~a;?rTAM8<_V%+*TK~@c3(8w5kG3%
zO_m^cMKjxJ!Y^8Gz5erjcr@_&Zb9$n;&rGRuzs83+lA#|#||@oU@E%#Jr(#J6a?Zm
zX_lbZRDb2CYz_1WWjCTo<ezn>0u~lO9yQ1E-(`3tz=n!7<!;6Nm8hsRlC@G-tUeOA
zI85xFilTFJp+8aHKmUboJyrJgMaRak>e?2!yB3<G=XO!AG%J749*%qA=($+|eUj*f
znJA8U^MulFPR>f%^7*fy=fAdN#lWaETIB-8OnHLxDoJBphyJ(e+3iRbEhoUn>9qP@
za#(N_27gP}Fs_Skyww3>Dy$F#FV1mxeBBbns_K>x=9-lYMq72af)x{;ERnEga3II(
zA3Yn7*YS2@Dh!9MB-RyTG9Apjjy2&gMje~En@3lTvYDk~^yg5CBCh=Tbfu9<miDi#
zC?VlEvBR>7spJ1lMVP2Qkl#w@JBfedqmG4TffC^iEq{gOkJK$m87Sw{qwxZpcDyl8
z(kYH}T$1@99lL~49VWgiX%NGbR_wR^y0KS(`v@hh9augi(H0dg*n+a9k*z;gNV7IK
zGDEy&je$kBmpVHh6Ir*p`eF}!UM@B8i0UbNng4)+esZvaok~pw>R4O|NGPv?-nppO
zFv11|P}76+GDPYhx~L@bGtm@vTva;4LHW!H=i*y}(&(e@k_ddJ1(zY_WqXsY(jOp8
zRRUg<QKM+0-002IMvF|X675eQAtv*jcZQv1jp9jQNiE%(bw3oU6)1v$qf|LZEbuOR
zZw$r$ELQQu6wzDHEu9EdFSmO<?FQI5ub-z=YS2&kooDh&LZ^62uSp%7ox8)ZQ3;^Z
zAHJn=TCI4uI4yTdaOTtfyuLhcyFMEFfT!^-82AdejgL9NEpB5!xqmoq7V{Y9t{zH1
z@Uo^#+93JqFju<^(2BC=BiU^mkb088xOU!K2Wj*Q02kVpLr4w1*YFiHT}SyHTTUl;
z>lTG=CknwIS^9VBvkPBJ6exo1!#ori3(HW~L|w04KcJZcZW~x@`tvGfSdjCto{Jz6
zeAbCfpU7(DH~voM=hb-|NIbnQVwx%xq?>=;TpgcLyh!HCmoQVv;B#JT53qq&Pe=UN
zsUCZFwsSdaSDwUTFe{+kFLgrk{Nxkx&r_v(o*?ECfCBFTi96$8jpEe57CN5?zK&!?
z=EVs2RJ|8UotGjyDbH|^E9oz=^|TKb>?m?sR!Xl~TQ55E`kKFuf94@HDpF_IGru{?
zp(dnYYA5Y!z1o8z0J<-MyN20ZE*Frsd!K&MDsMT{wYwgme{&611u2k>B-n2`Cr8<t
zcO9dX+ny|qd9+pN*!I%{JZ+4I`svme8oRcWaXII%I*(`j(}pe28oYushxp9<u?Sxb
z(52L0z(R%|&z<Q{J)d<6$9QLZU?O2z@FClL9k~b+SN&&EYb>oG%3&c-$ckJ9+GWrT
zJ5@F?UG#W&NYGy-E$DX!?(fAZA-9gwfyHgsi{{s{`y|=49PBpClL0^W=}0nW?rGC5
z3jm=u*74FY5l01&<<&V*dZ(rxTQ5U=-h!yOzZ?W~p-)Qah=iVT!LM9WCg}^_CFeqn
zdv{LIqn+O4I!GF0NJU3sB&fSc{ru26^F_kd=W?Sq^0Xc!;e{i!VC=-G2oU2GBpZj4
zd1QNU2nJ15Sqwp4&UR*KyQJ*2F@#P&0w3ua$y{swIUn7Kz*@V?KbP8W5uAgMiE+E{
z((}2W@4n%*Qk*y$e(iQ>m==sa-s5ATx!C&L;5v~(O~0$rV|Qz%K<UliN{rB%HsP2+
zYGk=qc~+!Rt8YW37R;^hBJ$kK9yKK|xEw|-q02-vk!U2-M>jqtV6AF)ed9JR1I`3$
zThVnt3<HbOx3Br4dGEfHC^&sy#>$-_$hbWDtn>oxwIFTm<L>>sMZdjg4Pu6VmlnMe
z;PkKzNxtj|ym$O|vBk&JaFXFN)$EncbVbh420B!y3Q_Ie%cxNJv9FOVjn><xb>Dxc
z-y1K{sTTfq?OOERz3utc8CPo5JR87%y0(Mq?}x%O@DC|3agGUg>ABBTSug~#llb`L
zKS`bUI^+Bpj<lpfiXljX%w;=a4W4&tQ`$eYvhIBkuH^@&97n7=;&bFjf!LK#$i0K)
z!oxi8Qjynbqo4+oj|T}P2%wB;fefi%(WJX&Ge5mP51a1e(w@*kJ3`yd4?F!DzMilx
z4J`xir}KPId1RN!Ei=eQu$9%#@q-D?L}ok2UV4q=krSU5DkC3g4mdt|;FM*d(spCB
zc_CHptc#*b6^^6b@{IdvZ7C4*0gcH7+9(}T$!8^zOkR7$&j7wGkr$Nqn$PVW6~3~^
z$)3hy@fg%YqG1I}So&F;b_v_Gi(Q`U?RDXPN%M-xkBzrcZqqZsil-+EW(~X=Tqfu)
z>fl2yWA0uZUFpr?%iwj);-;Zo1BQO|?JI1Q*|CJIS4-jc!5<k7%Ma6+FGC%&K?27&
zn2kz|K&@P+&)@rmsBbyx>#UjkrXw5t3Hu=aXcV=9-{l%#$m*}r^LaG>4)5bVsM);l
zUi~UYEB3+|zLtK6(111NgJMkK3x7iVJ?d66%VLB$Zx&M(@ss0tC!iDe@RB^WCyaqJ
zP{zIxN&cl=Q&K_Ng~x`+HOVlgV6Pr^;S`2k;IcNTBz#HxoeqcXYO~^nNQNm3J`dxE
z$jeAC)W=i~t(V`+xF8SlsgWu5*@O@K53|-PgN{+&GE28CwD_pKJueBaq&@7eGJQ|-
zDbN9Kpl3A#($XbbsmkFi?m=$G6~eJe6TEafHo=CWqPrnz%j?MY4X+c`HS>!f@D65&
zuMhU`;jy)oTNa$uNkF1d*p2({u?Ku`0QwcKbHoCFPE{6Ee**l?2uaLIUl&<AixK(+
zKF(k(!!X9Oo;ZoTPJOmXYp1bqe;b8;VZHQSydG51g*cT!kgarzgUDcBAC>gtU@|(&
zDKzNvt5rn|5mYFQ^aKSbsGnTuS^Z;iN2<7|)MZ=}XpDp56#y09%?Tx6`4m|oI7Yf-
zrNT`xLu~HWAYFaF;KSKy)#I{pnh~88f6~wQS=*eJ@bwtJ7+TCPxELSX?##5!^Zv*B
zBShw0%hnDb7rl_!T6_JE04982X{A}NYq!Jy3Z!CQ4x))hUdzGLyI4{@jnRQ3`D(3C
z3bLLtzRqf>n?3{PT;MJ``-L;BLOKt+8KZ9Wt3F1?{nWw^U`ca85Reo&)IkG364Vw}
z_q5k@p30I`A`1QT(BoB@$(IliZsSJ>D}|%OU7|cL;d^qQDb(CBFhO#KA;t&gupjDM
zN)+stBz2d_>QKtXTH#1am(NzVm&?1~`%2OjgIL^+Zb|N2XC>sq=1{+T;2ikOUO`-@
zw3WzMxLPGCIGsNDrv1nF$hv*{fr~`HH~gkqgzZh>r}ka|mjZ;cQaxXa17nm{*j`tf
z-H=Kf4Z_JhGk{ZENd2ye1%y-Y;{5NTe@5b5$R&EWbce(&o)^J!uGzMU2oXgI34?g2
zOD-i2LyA}U`aoQ2K2}Xo%r}@amB_y<Fo`}IzcZ)ha}x*hQbmv2y*@JAJ{=Sh{Cypx
z>5M{s+R)axC;JS94~!YuO>@?Xnhvv8K5pAvKR<a8M$${!u|(B*2<j8BUW>-{i2GLx
zmlyB|NBUNLGG%h_)sm{Lbg6{iQ4EEWn;4<oEsr}5YWP)Y&wh-+D!TubsHSiQzBmhD
z9Of_mOLWq*Nz@;_^?683j^8S=ki>u3C#HHJEn0ry@k)d>23<4Vd{Bsqo2`$rD%QYb
zCKyUoT%yfEJ>;?AZDqtvH1NoCFDel4l`@~?@~^!b=1Ux(bctwdvg-j~n;kY=*(@u0
z(i272CzPsTo<Fk+^So&oh^KtW)20~@@~v7F_H}|K+mR*;Rl01TeTd6FTJ0@fPUnqm
z`=rAoC$`WOCR>%glG`{o77N6NK~8y8>}6Ee`O<B(0Bb>RGqBe!*)-l(TTKf(MVd0W
zi$Y8-@s@2OEMZ?!U>8pZiN;P)UHgnATq?QJtS`6G{sB#^@`bSpvTLc;H;kUi0ws#U
zCr!#^YfaTG{>o-GckZIFM^%1A5NEcO?@SgApn38N%1&3N^&vEl<iVI4(vXsJKMVok
zc!m(=vr;s?Wt2*+?0#XUg36?%%7Bo0Q7ri?;92!=d>UxC2~GCnE_)+vOTQeydA*wA
ztl%P@ul`9kd_6x@r!%1FH+21!lkn7Z?9g*j*%E<Pv5yZgeo;S9T~w6qrO;V_&5qrv
zU8^lQ!0YuyFqwH$NAzUrSVBtQKm5c`+ZHvhTXj}qRu+ErwPb+d)TuUu4Ku1Fb+x?`
zm1PjS`zPJC(*Dj8qnE1Y9@-!#>*w=6GJhs(ZZ7*bRTVYF`grt?xJ|xwO@?@?%V26d
zA;<>BA%KEKoCs|htaw6jitihtE%H4mDRDToJ5NH<<UHbJ&;>d7ZUwpb)I2qMF-zq&
zS45xYXAGz07<#&3<H)14N4A{$=H@>0@2Qb#>jZ7`Q)ZcPE+nT%ypqYI=@qbZ)l!ZV
z;Ve<*;nHqK-Vx|jS3M71GQ}^J4Z>idQ5nl^zo=J(5BB=GVekV7K{$F%fr7H4c>w9&
zJ2bvErfv*`SzPWG>OVLcWf0=Gbo|c9+z0=_cRUEZPPNMmd;k|l^EOUtCtdM4R3WqI
zIFt0QC=rMFy+{9(@x*e2lzc`s`i%llkqX(L55bmE$0DTlybLJ0?p_&s?_^vGKkunM
zOPM5Am5|~fRHYUNtHk|I?2+&T?q66|dcbYd=Sc_jpiW0Yd&CkA{*%vhJfWQ9h)iOB
z128l-MSi*@mgk2(geC6Y0M*D-4LkY`Gv4OBQpo0dmX!qj#g8g;JsDd}kF}EBj#y&S
zWNijPxeKIzl#usW#!3|cGs2|l47f+$|ASlMHJ1sgrWPd=P1OchF?m(j_tPn(u||K7
zE#RnIw~C5A<{WryHO=;QYrlW+qP{xc7jc;VFj=Zs-N~6ARr$DpDVmHQzzG+l0{d9F
zO2{D8cK62b7Y?u=Dgk(0J}}oiJY9zFC5I!zY*5VwVCn%*Fb~RVd*998;(I(<0P$@@
z8cXZNN`veABd&Rk9l3;Dq2Z+bzmV2DX*f?4E`paKR=DwDX{*4-Xy6H04yOV?bs%B(
zCPp#yQ6<jlQb%BEqh_hD3Ad<vW##nxn-`-VBo{2M5xcw}wFRk(>ue?pv6*yZh}b)8
zu;BZ?R|xWkhZi8qZB{XXI-rhwfK|W|K((TM;uNdA7qyI@2P-}~J>U8gnf@K%lvbR=
z3&_(Fn7chKJ-hu|%Uz+Epa3lJWB8-|Yp(DXgPMDC$&Pp$^tV#R67?4n^b(xuEw{>W
zz8gULv>u+g`>x&ar0Ey(BU6j^*X?^Bf?&Q5Jb8*01!;zB|D=^Y@ewe^E8fG)2};8J
z)*mU)rYWO_#XeMi-DPRo>7){9;mGQG_2Do@l;ydLBxNzjc9NFid#!TvA&G{C6{FyY
z_1N^ghZ00h+Y~RZJ9;?)Y-(mEplD*T_EbE1Mw8k4MOxK;(LXq2rzjk{IZ0$M8+MkJ
z4uC+tGL~rrv6lhFYgRBTjwy69spnWTY3f!8z`B*Fn4bGi?eJqFznTZ2XMR1c#)mtC
zLpL@VV(#o&;!+bt1uLu_gP#^Nu==;MsR$U}h}kZ-wrqPedF<A6g9_wNPz7$XpQ!>e
z<&LnuxxTzd=7vRznbzAmfBNYOMW)wEI7`YO^@M>F+K9~#Y@}a6$PE&l;immoQl-@I
zBk5XY`o!rF0;p<L1@`9#qED_A<_WH2vt}_u2W6EIX9-I<?6jWcbjQ|#n+l}zAlV9+
z6JC16D*9@o#_tP{ZZ1|^es=GVkyRPFBr$5#4-kFv1K4Ce87#vBHT`B#cID|*klm{W
z8tXE+rh1E|0R+ND5zdp)ku({q)0rbw%F^&Hp5o@`68<qrAdOFS>z*2b4AHyYME1aM
z0f4$ph{s8FTxe!!!Yf5RVN_j_f-L;$%E$}jc%u&jb_SsGYJKEBC62!c^vj!DdalOK
zh!#Mu$ri!E9G3#8J_X{}0PM&_684i7Rqb07KuL>uz|-roMsf@)w?}+kOvP4XS>mA>
zbXml~Q9n+%r!eh00pv|gZH_~P!2RXsYN@S<b2MoQooyyo-MZv(wU6`0x&Vi<L+^Fx
z3~emkA~fUXz_%~&>r~x90)4s?#i8{z2qW1w6==g7SR0;HR+0N2bbzp>w?5*Idrt&@
z{qPkE@+FmFG?in{YeGfHtO#Yslw>JRfHvYmg!e(mJz^mVOm^?>*LGzc@Rpwycz3@x
z@%jx+msLRJGx)IO@NJyFbdseJhXKgs9;QIA6Zx|ZDtki})V{ah0h3^Nu8y@;=jlj)
zkZhQ433at7r5pzgcSoaNwOHH6>1F(`yoBE$OD!G*_KC^tC3Jt(2hP<rtt7xEv4M12
zi?~qgJEuaRi*InyFRWjE2fV7xX!!8Efy6~aIlW<`;fP-Cl@M$x_{orNDR)3)SaAES
z@J-B&y#{MP1)vWu{nb&#FZ&<#dJs_yu4lN0QDrq_3Ldn-64)aurbZ+lA~$gAxU)x<
zYK>eetLY02tW4F?^{5j^?!pxD^?cm%aP<-%?@W6D0`d~%#qkKQt4LZ-(qnP7(vr*G
zd2!$6=X4vl^pM}0wAO7F86I?@%P!lM!yT?6a~SYgENz`V`OcKO=7Ua(ki1t!1MhWf
ziXwR(Jw2`A3HVJ(@r}&I=q`@7*aQ%(XsaK2?1bepR>={Gum_E%ao$_^)P#3Nj@^|P
z{ARgU);WaP#KcS8tKC~`K=QAoppIC<b{KAtbH+^YGA4@tfo5`H_*<!bL$L`{{4@fH
zPP~rSR*x-9JOzD6fAzFv%Q03%bm}Qd8noO@n>>n+u^Ba-?o;Lmym*P0LF&Kf6ic4c
zhZ8+;P9?)?(Q7q&%IJkqPwNwcPI-b!Ynj|>7>b3{4Xnyhe%75;Iz6^L!&kCSh=3G8
z(Yu|-o$F2XDjXn@$Pm89rLL^-cn!-|-2SxTBBM58T$b9e9&^6*scu)qm=?^F;u!q2
zn>hU&Oo&r*mi26J{6%qkOeYtBU2(0U=OwW_+4v;GrKdxU?0Yt21r-~4EyOpin8slN
zN=I=vFswY51;ST1QKG|XkoP_VAf9=TyQIz6hEhD`_$9KKLx}RXIS<VNT}eK#dO<7|
zKPqJolmil2uomU>D`_(gh+P$p0ox(9eA<E_fz}hE=2j{*O3J&=tbPmfXBQ%Ulefgj
zUn|VCkTFU|T)hlEM7HCsM$~4G8hNZEdPj?g=YGu-n$};#<>t!*cl`k(QSDL8^bur8
z+fy@Ur=Z^9jEq!qAxu|&F5t@Fa>z1l+cWH863(Gca`x^I2tm$aj)dw^F-v~ctHwQ=
zz7u7RifbljrXL^PXO-zr5RCZl8&o_%@M1>ur8gShIH<lc*UiU!J?mB_gTA^3=d4nS
zg7u}7@wzNQh=~|FXNFE|jQ`xz&Kk`CI0(^`G})I;S*+;CsnqM2KgRK{sSvf>%a@wo
zV}RfyC_;4fF1olR)m3!IxWDgXOBHUDi74j2K|DF0Vau3ZcC%jXVo=@)9*9_DgFyv&
zL~>44`a6krSf<J<(D}~3(Z)7w!a=Lx=Y*?}X~9&R`C=_rfuT;%cOQBTl==GwCI{_4
z<=)Yw#UA6q-n$&hAq_>54i6Nm3cn*@E#d?BF%BROBPcl<C3MH#xJniBB)HN#$;qXv
zY3CLDtyeh4EnKyGjfcIY#0OPp^*l_#{odKqe(`E#g6P*Nqk2?dnAvm4$IV0-BK;>>
z(RE3KddG10H~FLRFnSQt(LcDaZ=2<NBr_~WdjU}2nWaq30w-%yK`*x1J0frxG^pXa
z?45U36Fghc;&Z-eBM>NgzauYRm;Cx5ew}OXe&t4_M)ZaWyAUm*COnFp+&5<)P1&<x
zN`d0HGZ^|OabgEQ>PFiB_e}qE<;Qk;-wOBgBIZL%f1zQ;O-Cpsm8!{eALEg4wRR8*
z69bRRnWCt1&tR_C@AB_3b}ToOt9_ts?^b(k25&GjLnKvHVx_@+<q74}k4(~7)IKL0
z%XiUfP!9<<I>U(66W`NO>rlrqlT|R9ELW70=h{M5P03vwq`RsrqxN(v@6S%ZA?uZu
zd1jtr167Sj9a%@A3Q3AX5Zrqox7!XlRzdf>rMoC;&UlJdUIZeu?G3wxq1`h>x6rXI
z&&C`rto|KT+R#$kX_A=q>ft5Ra^Y^z`fyf=77KLS2ZmJwP9Oshz^61Fynyx97DW5<
zk^#vErv^()tOQPur=pscT(BRSJQb5Pyz=~5l>gm~O0|T-K<b0lzy-g!6!9I`GOYb^
zYnE}scBM$nu9Sy&zgy1U;J0!;ijwsw4PVw3RvBYd{p~70!>WM_e6p9;cP16uQ@;uI
zA(1c+_E0K2l`qG32n&wPEMQ|EVv1bHV)GK%wFfiVQnC1yz+Leh&lyXE&pIcYMlop0
zu4K|D=PiHaJ=!>uQV7S643N~nFyMgPf3}u+`YD~#mZ*z7Fw|=WhF`1TKKy-aQA9Cm
z(ZG<I)`!FH5smuEo)(HbLeeNup?*S-souW(>+~K>TOV@}P(|77ST(8WJnqsEr#G%r
z2Gigvs$8_Kz9DBysgs!qn+&fhwV6mnbLM#A^y6f+%omY5nD#>>8r|x%m>BG`pI)Ci
z)5rf483$#6vO+EEM-WGbTAG?>mYen>I@|Q+_q@i4dc{76F-BYRgs+*?x*v;*B$!>`
z`*|g=N{N`S#VpmI1lEeULA>%5ltGU^v8X?}+$%vf76A!tFN?FfRnEOha}G0{n+ZHV
zmv8+zYAerz&CPpP02=)8+j1yHe(dTifl-T-yF|zqUjv9aEtgb+in=Hwb+sLmWOYo4
zUvQUyZsC2~`6nj=yd)B?;WPKjq_T>W3r69~ZwVlYl`+XMea>L7m>hB{7WepjtOVMS
zP%O?ihN5m<@WtQ<ad}f{*cxf`Dqb>kvj>(C2$gtVBU`x|c_hz@(Hi*d)^u|=_mgP-
zCwFxKWsg{$+5%<32~2GadwHT^9%|1W6C+sc#BDbj6_r4*lwTkrrBHH9Sp*pvk_bQq
z?2Tx+BqnT(pp#CZ7Qq=5`omf6*!sS>J@32|!VzjsccTu^DcHA3;>pRQvk2xo1dp%1
zGN^gMZwPdVQck{Gu&aI`40))iQp1Gny{;hTTauVzibG9-(m}9x+TAG`6~mMO$HOzM
zMG(l1TIUJFD}INUl>0DZ=2bkEg{j5>&XfAWCqj{P^;52b*|CK)a#0`f*WW%kMky&o
z);AX&^jBkP;>G$%?#S`)@?lu!`n~vV?Z3?R8~bhMdT9H9WUdQ$9zb5>y%_WtbCdm(
zTcX4O?b$15X#G~}t+<!IVgtXomGw5;_?jV5_7gwANA+>TTs0NS(%=7(q<jNg=&Efd
zz}9C9rQC1R%_>Cg%V5mn-ikQi2?e&9t%wo%1ZKlaSj`R$zU~Ar`kc629RGGO3Sz2T
z=cN3w0LFo#@l^?)t+FQeLsb}u_u29n2bg_xLe%CYj_&Dn@2-$GRJS82zSndDBQ|u)
zfC8bSvg`v`>~TlV-(pw0GDhRa^xdq?`rNYwA;#QZCU6WX{lVkF-G^~~Kv|H|coN!+
zuCu~Y#DIzdjn~J6PRL+OGl9+iu%d1PQ_9#-#8k7e$B|0pwFU|V0m`@l#>i<Ew9WWi
zxfHzt1<%1nyMp9@70;m8;2wpnF(kheJ=iR5z~~&E&%R&#qv!yQd;m(wTSISq|FbEq
z;J()_9<~kt<}O6G{_*tROgYrqyLsa7{1d4Mxc%9~MWB{=aaB(6JGJkY$?o3GTZKLu
zkf5wX^wKk)3*2euG*`Cl(Us+^bJA!+!lMn?xi-Y3edNxl2syN8C9_fi(&DFYXW86Y
z9T>egKTYf)56aO}bETcXHLpsBGHW-ldL~O^``Om}g4sI9H`Xk9tkRI?lv%^g>$Tod
zYYMt*jtH$=DyC=vmN8QnPF)IygBB>*@dAbr$g}uAFspY}$@^b<gJRH9^5_Y<TD*=<
zYgLgVOu>%gMFl}wyw(g*Ofh#C0pKM**ZGmh9^`)@rMt@DcU7j%aWKb0@5&BtU!k$n
z#5qZn{B6fa5a-PpM%FCz>Oq+UmG%C-bCs&zV+{`DcDNE@+z0X-pl<ls9jcc(P_nL7
z`z+@ftWXTA8$AdBA-!o`n71yyDHdQg{H!qP!g>!1c}7vR5i9{<?~wi7bbpUs1UgFR
zd52!%^#@RDJc3c6tF6Wq%VMd==+ZR4@%lHi!t^Z|#;|O#6EyFS#b?rtO`%!^@eZw)
zbliIUFyj%a=vTl<#tP`9Z<p_q_jyD}-hBs^Kt4-w=10x5(=9;H&OZ2&8cZ%@X#rnV
z^c(Q$rvoQ6KKs`}7p70mCSbJ`D5NJ#@JnV%auoMF!nqs4)`#*2EEZ=f)uI<b8fzR?
z5ASvkkuhS?!;dtF;U>oa)~s(Zb(Sc<PZI(%IEz<Q8(4bK&_jTH48Bf!C^ar;Ne8c>
z^IJAY;b6X8E!1dW1)(nNeFvc2{oOH_?qLh|$LyRv#NY7+gr1V{;vH+nN4n@y4q3}Q
zB;}dk>H;VZlO^xUWB<D^bg4F(Xx(JqR!7C_FhJBjeos|_8>=b(0|nV&d;|K{Tr&bL
z={KZBxr6u)KfYB=EQRaQ(8IJOYEcWA1c4zIfVT+-x6ofokg_7?U0VcfUR7vMN+xq`
z?Yzs-2g2=Q$xyH%uiyk;2?6#nF#b~@OVfU@N=I1kPXkB&^C3S8%v_iUz=;S{Sy}{C
z{}DKt{Gk*uG(BElzF6I^4z^`bz{?wim}~rp3=#!Sa>QX#@z}qUtOH}$-03d@>>A2*
z#BaZ{uI#c2!bA^Y)RI=aGV*_R!8{m<SCg$6=il@zG|KrGjU|7Fh~{9Y3i+J9yo0LE
zUk)@Qc(TrSFQ!E2()V=YR$@p(@L4~h;~PKKi#2yalzR<8C-E<56o3$zmtu&bk+5Qe
zhB|NL{zZeb6zn#j7}SEOxC=vI2T+t_=pDmtYG-YWO}awj>_PUd-vNrC4c*X69H5Kd
zvmb0KM+uyo%{AeT^xYL;`qe46^b(cD>vR!w4nM>9XaJPJ4UfN%XqD>XC!aY~4O|?q
zf{bYX-~)YDG5}(}1F``?aeN%>mqWneeG1A@g~XOVSeJmR(WfSsVFOFq063W^7L4MY
zA0zJp+Z-ZxYL3<=GcIfGgA{EtJebhuaSH)r)i^l&31ldZ?%RgWJ@~+>p8@NTa@D)$
zfGYLs{w2VkT0jN;$$jS__m}N<<t??~&gZ*xgp%oHS<Wq>rjk-N9#M2(r#fQGrT8`k
z#X^CMyykvO=DI$*#yYp;u|0`ikUPRN7Zq0y%N<^a9d3gfuMXhp2NN3B19tu0cfNpu
z2#VRt83{1b65u8Ree*jSpy#w47M-r_c$)dh9hML<CA5j$T+oOoYZR%!H)^{9X_YC{
zbK{~0fT0qy8p<zFiHB&ffI3wX=(zw3=U!_*8e}3Mo?(Q#0%8DPGP%4k(H1EFuWb#w
z!7<;VE~qSdqzmaH^Oz1055bGRoBS>i<Ix6?BmEJGsuloWU@(QX4XmBgn~+)e+pPDL
zf#)u$s%AU_jRY+;F-CrubZggVbB@UzjKobS`kw(5>x%EO_}r7!$=ofq+35KxepL*d
zqzY23Oa&05YY(7oz<{DA9z1PPj^cKfLO>qsk%UW$137TP^g2lFO-YG_uK=?`4N4nR
zMWbo$0EBrt2N<@1HF|x0JoaKGoc@)_jaP4*{O5xUt8c|V=X<ohiw`N+0C^Brl|nP)
zqi28Ft4y+$18_9yI)IPT@WJRbfO)S5S&)23#Rho%<7h-|OQ2=u+d)4~44^v*4J9#o
z0x}5}B@ANS2kc9gCuEYyK)r=F-z8L#V(;610rv4dAw|B*TdA+WM03>xc$s#n^Ikz6
zki#7lLLDuDUMfd-(-<jd5am{ot<=_ljls3F$Gf}$H#yn-Yz1g4LeEYraKNc^pRq`x
z(*9&(%>mb_V(?UP@bYTEO@=rUO>6mWj4))0+-C>gFcF~ZM%SAKZ*twjCB{_*;PIwG
zExhmqPzu%~Jhvvoi`l#mmii6x6w!2rTPUu+$T_Alf<r~=57+jjnZXZ$K)zRy__Zy(
z@@l@;MC~hLtBelZ;%;*m`29ZML%Io8OyiN><;Bu&x$AggnaVELwAy^m*MH1HH^%8%
z3kLy|(|ehZ_M4CE?_CIjvbvy-_sL$>5Q|`sw+rsxRQa22hK2dMg=b8mfV0}OOOjLV
zvL=gxp!z!Qd&fPB4`Ra!nfDsVT*7VI0rP2;>YON<K5~7(dQ4ca(QOmpv6%+|gD;#_
zJQV8W3f6RDP!ibXdPfx6s@Vc^XuKPc{_4s0?klq~EF_eTFSXyJ@D=@Ht(+3Imwh&W
zPm38>s<0iX)NK4?xSY+Z>>!|uA{cQosT~LTo&~T%{NR&XoWPz}2Y_LPC)!&EAu2^w
zb<v{9*};NNSeVdqYb|zO_q+~H^#~2dgL~~;?Vk;pb)uHY2Tr?-ZEb0|<J!2d7_>2z
zb>AnZC5x%rz}?7<2_B<m4Z%!4Xu=5mw$dV!1nEj6MDOmK?Ome8{0jQ0V*1$F3*jTJ
zCNSHmcf_44h_dQ=zq)-U(6jHLYJjN5XM;jSG7P6ctPq)528&i|N4&RYHq+T{TEON8
zAmNBB+{o8G(~D!mS7<XGg~@W>O9A%`pPzmKeNl7)bsF7f2;zJ$$%g{(Qr}ZJ+7P3>
z6=rCVPx~d*XM-S~TlOeKft}JhOlMTyEhsKL87{R@e1aY^j7mNL9O>H3HAm1@m6<an
zc{hP7t;-O2EjSl$14F`go^Tnowb=$&t0=2NThF2c$gd{B;3?iyX~2E+M_zL4L_na3
zQ~!~AD+%vPW0O(mJ?`Kw>!>;`9yGJyI$R#wzUa6IzEMQ+;s!aoY%TR_hzHazp&bly
z0?9a}MDT^rFp)WrdPGUT69{_@-aJa7A>;=wE}ek0=EP2r3RoF;^oy;Tg=IEKrB;A3
z3Ek(fB}NQlHG6>JELWiYI-+B2(9_O5`$3B<s&$eb+!1!n2SOuXYzmIv)7Xj!wxEqi
zoJ25BUlxv|eI0{YnVL9b!B>YG6YQBhB9ou-#c)nm8e#aS$eiB(#5-6tizSfyu&tL!
z=E-oF()u9=WEV8BTzU^niKG{@!&YSHpuTQ;-=)&ZIIL618&Jqn@978NRRKHhycH{s
zfvjyET8T5x1}7mkdkoytvU!&@BF4ki`r*3SK{Th@Q-T!|tQVBohIftFmOlB-2UMac
zTdI%QW%1Ik9>&<QiL2MoVi{WsV^RgBG@$GFu+?RN2>5cF!Jd32s{8QJ77EoaL?k~W
z3Rq&Z7{tR#Rjhg?FlQLfAlK~lApq0Ngh;c6;yT&a+aXw~<ne-e5c}ALZjTA*yZac1
z#8(nu$fEzMLiq>Ea5q&xADX-R7D{OPbNiE-=cV_>Rb2=bQrQAnDQlU`i7~X3$$oJ{
zF0>lP>t~35x`h%vjEiD@UeYDU(hw9pc(L@c_aVO#orG;kw>KGUTGCXCQZ!+JJ=W5v
z&ccY6oui@1><)?^L)-V{j+2=lv*Du9saffD(=1o7Ew&uwbyLd>z^cbJ3pqtJ=tSxS
ztyHh7F~~WQ^0hl5_Y9?Mp@e>Y4{@SUrhZ;6YmweFBwa-I0Kzksq&wlwHk81B-XWCs
z7+2rK)Z38PW*o6e8u<JX=TsecuB>&6v#hh-xmCibP!5G$LawAuFP_!}Wg77;B7@*Y
zF_h{RgqAM7$ZhhPk;8(h*O}6^NS<VzUk^H}Fk?J=v08HHk5<Zp2qY|jrc{uKp=|Ep
zhb7~Pu$AGNyhBS#7neFb=&P$vyYT^paMSwNe9pOb)LuV^-S?opL|va@-4Tldq?PVQ
z=8b4C_93NsL*|f_Dac{Mp~VULQPO(eOCrO<ouOBcv59vT)RavS9iNHtwZcDau_)pK
zAqX7Aca>uu5`%f~kuiXdxt1bXTbG{y^hdy0dOw!K|1#Cn0Xn4{fB1H6H3E)8R$Saz
zq(c1W=xi*C4U#A-dA&Es;G4He2<gFSc~N=)Aa==fhMIWRc=YwLfF)PZA<Z`uD})S(
z=fPe>F`W06S*4)lTN|`YjP1uHgl1A!&>28RlS>)7L;gIx9P^By@))V@`)j)udyVP%
zG*m&{#ate=ArymNwR_It=yw~C>j3^6cCXnRyFnkGVdV1llNdUw8`TF{`yTxq==b9@
z#SMvKWr>urT|XBnt#hQA2^1XQuhu>ez6RLh{gtt9h2XG`_Aa9xDX5ZYbGe2-lmxKX
zdEXX%;`L=ML=6xdg!*Q84<q+}t^bS_JU=c!-i<y~@-$U)geJO@-wurzpT{_3n&^=^
z93IBUc1EconRebd$8+GipQOYkH4A^@euFAFU>bL5a7XnBrNgO6Bf0{CyQ_5~cz7i_
zWE^zZNOBUHtziw=F`#a;@^<wq%{I}(NRN!lh%b>3Bu6ubb8Y2+T=e1q^r#CZpT<{<
zzx#cI$L?%vt%+3U-N03(Q38)arfR2#yqZ01X^6gfzgRo{gqL*eI9F&nO7o*wN*0`k
zRlgBt91(pq$)HLE6(j_@>-@Ru?zJQQl@(+*tU$Zw=Uz{-1DQ@<V3YnbbED$OHq2V&
zGhMy1=;5G7;QC=oQ9_0!yWz}}JiLN8=0nRsq3N~aZ6mq%b<sHO4t3U^;An$9pThA{
zpY;=>x9^ocBECBjT0$@<I8hc0^D8oj!|qsqDw+Q&N3mj~q7Z(dUeBqHBLVYS<YN{t
z^utz7?5>nfep=^BB6E5UI||)k^eJw*y#5lQZ>Y@Si8>!59((hBevU}+omT??;Ih>T
zB`?xptP$<L=ph7)q54*~C?|tZS#dcP)&zpf9~Iw%To=|uK^<Z;%%Yn5N7G@#86#cw
z_$&205u$AHt^LGFIuCIS9c&O<CD0BIeFhQ2#r>GR%G)n|Cyr|W(HR&~jm9F8qywtF
zSf%dC^p0(@ItzEm<SiTCA1x&IV~}^T+-)$!L?8rq)wN{jV6SQ4(*C8a_omgQto#Fl
zeWva;I)4w^mgn+ihc`csx_f`sB8rN(^h5Pul?+SYBV<CsMM8#{6Iji>4e8eLbJ44=
zz9d{lv6QoC)@1lG9{hYHOeBx>?S5>4*yq7kuSKfaQDTMX1<j#=@3XZ5*+*Nrun&2k
zd8qvPWyk5bv;4cVtBVZw)Qzf-cYdhmIxr}5luh@;xuCDJmFUD3pgag+E%&Y7Syy}k
zQq`A8Y>8c6A<Ke#2PA#2e)*T~mO;U4$(+e{rRDlkbYX{#n4M9c-TQEnW4JN!ypHpO
z=Z$+fT{$g}a9?fNlh$A{MYKI#ihtwdYSpVUb@kctRl8=~kVR;jA>s6fC)u0zn!1Ob
zQMNN;angG0;;V9MM4?EU=9M!aHn0QxS$4JocJ`e3dFV9Bi2Z8hIhpBEN8(y!lwdbo
z*L|Tz|2U2#dk@n>vh6rd>^thReQ9PJjWG^m3gfhd2HC95&^w9k@A(E0@j6ppp0j^1
zL43#HuSVwt^(QYv1u5x!+;*v+45y4duTyiz`!SCQyiJEARqpDGKNM-NnbZ;>4rd>G
zD$tWvr5z=Et>d}IW*N#E>Lab)JmpT<BXybjnGb(xhR2qwEaC#8a_cnN-!$y=@oYM3
zHKOp%lH?Gz0Fv3#p|tX;?%#I-JWvKDXv%Al6?eXfo)E)Ex&vRLiAI)Nxzj^v(!IlW
z_N;o!lR7P73{7G_npnAuI5Ir+0yDeITBrb2nBOOFM#w)j!{8qxk0**T5adyY)mIgv
z+rj0>VVD!-o6isHCs#DbjB!I)CWVbxa8^U!<xB(e<Z9<I7BYI5I4TWpa#SIJfuq*h
zG+SDm3x%ZkJ6<AVSy2-77+d#F3LWLSh1MUKee2@A|9IfU#{-e^09{6Z3?C~1LbFs8
zR6qw^EcsxpPDPC+0`Hf|v6U_<=QXT`I$Ar)$dj}BF`q(L-rufttbyp9<a>L%u-QgH
z57}^=0Qn-c7T1J2DF$tl+l_SFekO*qmLf_%`K~+uCQL<)(hl$6orrWK(EIL<8ouUD
z_JtQ!$y@40tVJ1OBC+nIL)yv;J`B)chWCr(QQXUZem0j_*5;)<ef`Z1zt9C_-!GAk
zs~=mklZSRz*hqA2a=InwhzmxhQ9Px2CePl5JG6baTuSJQ(aVg}C%dGu(|HlZ{$QgT
zp)#AUX<;SGF>AB7A!0}j?k&j~bAKw0^n_UR<|<l>wxc;e7lQM5(!(hEd?MiLAh4!1
zo;e2p?Lw9?QEjAba;H-F(n_1-y^n;(3vWEd-t60#h^KQBZQDaVbzN)S8(yM153FEz
z7x7jh&OK%2Fk}aMcSlCFRp*Yk&KHJ%;Kn>Is6(Aki86!7l#YWde%`o7tOjMw*=Z=^
z&5>fP1mQdea{ucj@rxpWwowWp5+Rz8Y@Z}1Ni%>EaXgH>QV>{b3En^eC6K|wYNXBf
zu9mj+wt0BllTEGckEmTGnB^{jTCvXH8BR<ue^0R?b%hzEuU_RX&sbo<oTQj2K%P*U
zgJ1s-K+^v^tSaD|5^LR3`0^8R4aZ~1I0`c4%E%?>iX!rzSiFvmiQ4T;8?XYdsVGZh
zPio@z^!^7tOSSO-5dqT-P{Pix`i&O_CBj}{U<mj>pq4WFM&!kGg~^v<Ex^L}1MLUm
zv-8S4avV%OL)8Bj9b*UpEpV{+MNHO%Ut_SwmU+<DCpNoszzrL1m;1O251z5<7GM^Y
zM%D*3X^_;oL^FZr6_9~aMX&jHgGTU(P*qWqff6{rN58w)WBU+wYwg32o3=$kFDxiN
z?qT4Efff+VZgB?6=zoAZ=$|!%tPLjFea?^8<0zF)VvHu?9$dZomb+PHnI~<)!6fNF
zc)KsuScVXgq?c+}Qtz6T9DuwP)ls}_mSdm<uRQ9%0)fqlueoeg^M`u}4AJ~Rw<$w2
zI=bBuZMAAM$A1M94{(gT4?w4elnuAzf-mUdK>qKin?MBF?MuyifWC--9jxF{WI13k
zBrZ+=i!cU=QMQ}j14OllL0R1!;-B*s%raEmkP#Wx3zfwpVl#V#8vK6#h~<*yV4@`y
z8v&Y)6_9}~YCJ=4#jM<8LI)$w{W=<AHE}RF@t~^KEH@mHb_TqFuY#X9WH>2`A9oFN
zYTnWUv7SN}dh$BIs@AZ~As#835od!QlTzIpMl@cJyQMn(DX}xH#2Rm4JN1noUO(li
zdfB!HNrE2MAU9c}ldubQ$g(++&<P*O9{vG<k?Qd~8i03EAp;;OiN}cX53oW~Zr+iW
zM^R(Rz<QKRq)0<Z^#ViUw4T7uE48vdV&^KWzW)4|xhQPoZYKcdnABGSG~XjmqvBry
zf(3~!kB0Dz)O$4!CKNo$f3=D*(Jf%{Rjmqfej~@Q-;^i-Xnv@S_giJaEnwaVG=pX4
z$in3G`zPQbxGq;z<+jcaEBOFkvmyY`-9s23-$L%UAbl=Y|I*Pr6<emNhowY1=7fSH
zU%x=wZ0;7Vs6J3Wn3F4zm%bAeEDE?|FVo{|V7+NCStW|BVBKNrk;*r@v`&CSLTJ(U
zW%B^%Pdgo+p{lKpFjoKU**GXMhR!{9xWyXvagn-O=^8Avf1pN)deo*Q4>J~~OJ%}{
zmjP(lAJ4{#A;qSylK#7SDXd0Wo1}UiOj=V~r?8rfs4%S1{-!K>mgF9MZHnb~m;B;%
zypFSe1R8Cc$cq~Q<8mlF6@l=TKVgpLi)tdk?6a4Uf=mb(UIrFnti}Jn-{1&$D?m#v
z$FhC`@*Iqw0;*?LxzSz#W^@FNsuHeAsW%`WXaazw6IwIq^I8PaInL*Mv|gYLNdb6L
zDgG22fHqeHOap)czkbXSZv#AdPeu+WP)u(<oz#mvgl8v);3&U6$IHEhz0jrij|s(c
zfy~Sp>T)pMtLuO<_W=J6yuSl8phsPzTOl{PP^a1wprJS;8m2$qs}+FP$OM3jj(0di
zAg(Xy{sFLa&J^Pr;El9^Rl#~|hLV}5N^}BK5yYIRkTXZHV)^F$dr5?zxn+bW$=_~o
zwilB1<Iwy1+vkff+5lc>F-VK~3nc6^K25}Lq)6R&XSsXgLH`^(s2%|<c`hk%;Tb5p
z2zcyRkHFYG;3X+rpyk3m#lXYaMitE*A|0Wi`n#F{yv&UZtH%I#d-5@03>pB`I4ZIQ
z97sG4kDY1I$P5E~-Bm$1IB53T2Jjsy7%+dLLxaNlgJ8WvU6BHy9UD;g?4?R%5+1Ni
z<^rVHe1Ea+mkfR8$rmjnD*YR9(w?8qtOE`tXC$6`%Rwh9un{x@!~-xnC(u<<Qv)1z
z8}x<jf?kB6OSa*2Sd$UZ#&2|0peG3Sl90^ToK~X%Xp7`80!{P%47Q85#icuVFMz8%
zq=^xX53llt;_!BX_vq|a_K|zk_W*PX=z(E*7w8H^5T6BF{z_%ADSsZ8B%a6ce@_LV
zbmLJ=0N4+BjnKGZR8WSZ3Iz;0fXP#n?lZAb($%oxT?})MU4z8{=rve$uGXR9VRzu`
zEkISok#_?ch3@bjQmWrj1sd!Ntr`b3XyEQUs^Jsdp4x|Ew`F<BCnxG<`rZWqM*tmW
z^4>d*+hqixX;1`9sosG-Rl8V|`G%uDgrN|CXut~P<^cXHFROacP_y@O$>s*KU;>dh
z?DYbm>&BStewVAqR+i6R$2n@+c%!?WY%AC)2g)(6I6A3~*sq<}nS9RF)cL?D!d^<C
zF%NLv7e7x455bm@u^GdyWq{Wb0GlOlr5t%2ZBdUkbpycJ*(7ne056cc3M7TF$8e#O
za8+<w%bpB^<w{Winzn*Xp}0V>M~>}-`Db#~Y%gs9a0FIENfJiHilvSLVYgj<e&_Bx
ziDRmC<%qC;z#2`yGvge>3BiL5gAPfEiEK4+zD?kan5EAHJ_RzRhO=E007MJq)8aS1
zJL9zfv|VE{LO^lp$|c*<J_z*~hbIeA-^iR-Yu7rcgZcZ+QF>E>FMk6x35i2DoXYlN
z_G6?(z$d#RX_>K1H_@+@#z_t2f3^o6U1AZuK_FNSK=W;&xeLn#$ad|c9(Jg7<L2iA
zv#Khwi%9mn3F$gL{gF@GuS33xb(u7+ByOgIw-{xkR@ze$utTXXtdRv=>Rp;aWNu!-
zvg3e+r%0t!fkuT`iSHbS9rn~zyh<O5u=#tXpEqhwCa9fiH@ew?LJy|E4x3#FFZ{B0
zbPSYDa}e-EKYQNgjZ?r2qaj1A*S6NdusH+RI{^C8y)oj{kjg<F68pRBZF5p?oG*mL
zCj3<E_qq3nRr^m@SU~GsQyBW0WGT^|!0>*><t{f+Rl>3KMW6cuD!z8h{ZajtgyES{
zPlQ9K(>_`uL4-Y$)vQ?)bQU7`BCk#L(KyqYLY*J2qbdmo2I7ScudtfERGe4lmf}z&
zMI!_xqEuads#drV3Mol;3HI;O%Nuf}n5WM)$v}-v(Nd+v4o6MwV`d>2n$7fdKPej{
zVe#SrqUtT6s@lHyZ{pA(4Tq9OknR-e?r!N2Ns%<@?rsDGRJubznnNR?f+Et0f&vyO
z>VF>Z{oeQYj^S{4x#w*5UVHDg)|~TsK64GL-mBSYL06$*jE!?fp@Q5)Bnf%@to~)c
z{43V3(|O};8n3a7FCJ)8np-XLC8F02C>F{=vCy)wn)&t-rv6*l>Wn_lF}&2sgTKeW
zMBQ%yRTH;&{AL1e+S@$S{LaqXQ(=T#Sxi4Jhr;DHJqb46l;4q6>2t7kTAvO|?8wM`
zrn#CgWSkMJ{D74`A^I6xgSzbRpNfg)=|Z%uIrAkPfmVq^RcF)YDkp<w#5b*=paBJ;
z`xp@}33vFn=O5IReEpqLxX|NarjVDfTo?6GXx+a{EE(ss!>K=bUWjuXbpvXs{#k^%
zasQtBWHR#AJBvE2K0@r27;G}9_%7k{X);2n8F<zuXB?k;iVE$$j#%co=%bpQOi3{&
z>chGlx9up!K~zYP5LMZibSa#H@>X*LPvIl=w2N!Tb>UEVG<VP&CL4WsiPwK+1xkaI
zXQ;<C)lxjGma}AT{U`iHV;o-yb6F|w&qeNesCo5vItVa|bGlxdbn{*0WFIIuXEuLm
zFc>CbwoGiL!+vyDJF3<AudGc8ZQtsvP2i8l6s)uKGfTKfs(n#N$!*`SCsDML)9EPn
zYq0_GZN6B=E7O|ElV_8P=fZ)zW0;40A#`kQLX)`N;SRVtV+tHjZr;vW!Ul3<V1b@w
zPn8;y4NPF5xt!RYvJ@{#?JB1>bM&?at6(H9br65tvW=GD26t8~)yI-(_x-b}aCYsY
zg=BN^NJGI@q^!nbIM9&cQ{oA3MfC&CY32u}!2BrY9OEr}>=7mD<C~vJ9;2aCn1~}_
zL5oXU^{)~xM2n@T*1t56YkS6Z86~;yf`!jcQW=(!G9DVu=s?KoWD!s5I+uZaNKqhi
zo%n)2Dh7xv_FinXrMt*<Hr0!TxZ`|{LD@O6FR3Uo3I@m5**$$W{k^{-SD!y)o>m^&
zVA_euqZT-Ocj?80fhSXUj(!cT4YNjtn<RD<eG0x|g-Qj~NIvxj364|X@_UPR%o<^0
z?f4FL<~vKb;_T4g<GS4)$Mn$Px7R?|47E-u=rXv+rWp1h2LH|kl74+kG{cMmWcG$(
z_)^bzYA>|Mg;#4Xe@=g()A?X!`hGOhnCxgVZ|sD8YJ&9|-W^Bsrco__e_NAB_*4?0
zY-kC%Lnq<re+~F!eP3BsEN(Fs;cZ3n-P^gMid6Se+YHvDbk*DJ-YcE@fpb~}Ad2Zc
zk@u{_zQ_Wo{jPvAl;Qy3ZG0m;W~RQDLOnt>G~Gtr{)8odQ2fcvT)8*RJaVFf42d=w
zDvcg!aRl4=ki8qvmrYpplkYkz61LozHn>=ar<gwnq?%Elp0V3kzDz<jFO{X$7+(<a
z(B~-CxGh(!fF^u86b@Nz-LtdlQ#YnggXHC$O1rhcfKxDfphOsb_2fdFgn5kQfRS5Q
zUNa%Bn!Jc!sT`f?#f&vE!o6Y)La9A3d@t-^&&&nF!{>&>CxruQ*9zKvJ~+L#;G+yl
z;rcq(s)HT2Zy{|VYvDI(CLI6NS%3tWVkAzwyYl&|%B^5hZptuHCp){RY(9=J<~NU>
zJk^4?_-~Af>KJE0Ny;H5X;F2e1sgLVb}1>_1^HZv)^9Q-P4xv*Ja+77OB3rB!KLEx
zO#=5`YHEEgLPgrKw=<}3DqevJ8B)(VzmROYDpOS)8Vol#+)Bu~toV$giTQBdO;OGx
zk><WkX%M36z;#11%gT9Xa-nZ<*|LH@Dc3KixAkb=`Zk4^uq(fOv%f=4!mY&9hgee!
zVGJ||k>9DvPJ^9nkQ+NiU-tBH@NUyj6-nJch8hUA5pMAbwPPISVM3u@2093Vynzln
zZ*bf1x%ht(f(-?1tMk5?265r9>(lIYT|eg$eWsVggZ?<1!4bp{B4G)4rp#PAL~{NK
zCgJB~pkzw7JU~qQZ>)*uDq%%;|B`xvIH1mQm!<S~&};Lp`bR(lGa$b7UoX`NMlIQ3
z3@5MUIJcp~jrT(j9$SRfa99;eUhzcSh81iL`9A?2I=1n^TFJH4LAvs5S&Jq-A1=@<
za9^|>PDlyv3mfIIQd;@~oI{*igNs0s`A=&4H<Y2^Og~gqHv8gAa>=Ylr>$Jz&f%-}
zPu+VLgYp|NlDt}$jVPu#AJk%AR{vl412hlk*M(nUyVeFcN3hiMWu<(e__;`a3Q1NA
z+>-j3712{bV2NZ=c;5RCvP_cGW!HR?Ppb=i99%?C=6yLWD1J3O`RMpl?wh&m-WC*L
z!qizUZA<PHl1i<I1K42?tf?Kmw3g(@OH(bo-nJ)LPsZM-{ulJ=;X9kUmKQJ6<7AB7
z4x`z3^}#c%;~J1+;SGO%_oi{Rj>8{13me#tX_Nt1G3AO*x+VAJEUkmQq0&jdSUq~V
zwq+(CAYuXn!&TKs?HAw)y{)e|>H4Q#ymA5der@zb9(DoGR6E4&oK>@j@7@!?zBK=l
zTe=&^^wGa3L3KHz?gG8zlgA(gsk}SyoF?Z?Swu)F)P>LDOv%MfL#VFm5a<|ZZ%tMX
z(1UlP=Znv*`+0>ywv?T#OX^%p6L{>@=n}xNnM{IM5ls66fwfYY@X~UZpVNgtYD<q~
z<Ye{^cC9iwzG%dhz?0<q`ISpz(<vse{|2(PB1z=K_Tt01!yR-)@<wQtjO|5d%BM9q
z4&#i?w{es^yE&25j4_F9ME6$s=$_=cY*48QzQ}A+HYbmR81yI1q+-771E?YDzXX<l
z9(7GhROz&%>0`8%1$ND(?w;VjdShArn}k_@)n->Xbo3PoEm7{2BPN&V=1<iws3`Xv
z-^jWlyp~vc?{`SDv*yA49<NOt8<`c>4d#nYjV|s*^|9!XTmftmr9L#+H~B`$dWjNW
zSJU1!0YXgEjpj}4P$G5SG0YWpX}#4Bg5!|{%C~=>G9Or*Mb^eEry$2PO(#xLG2XH#
zcbzhcgqH@T=sz4p^F{<P&$ZU=!*51TB^jP~ZKZB;CNgJ1oB@{hqQL9VDpb#hOwfZG
zLkOE}<kh>qQzsAz07N7KBrpyHJHQT$c_|U3^>?qNk*xswpf0s$8=9x5ejTKFL^f~0
zQ6+Qt05$YP>f@o`F6RoUDh5M8Lg8NY_t#vayhiXHsY|Q_XTY}EQ59%Xn}Tw`v-v9N
z?>SacVKPTN2qL#{Ai(~jGrL0_lSkPvzed;9Z$-F%1Ob}cN20q0(@_p<(-5~sKivfm
zN<{AFb8oap28={LW~ZJae_~M!RobFIkx~#v$sR*t)=^930xoPpWv|jd6u3(Uc(O@X
zYCxBa+d3Sr58^0j+QQs{B8cFdsDoHIm&bVbe$<7Nz|8O3c8qBH3TT^Vz=Nh4e|`sM
zj{+qrV2a6p?81KD4!BTI9R4s;yRr&_hy|iH=+hcx$zDUrzgoZSE?~Ac(x^R1hqFhq
zDBJk2OFvRS!O-X<%Q1*XWWFZJN?-?q37d9tHorAZ^G;W{Z1Du=Ljk)mp#`C2_l9&1
z>u!I}(j=O>&bj{Z5q6PzPL5qFHOH_+*3#qe@N!t5fBSQ{PN2VjT8p(*A3`S2=Z{^E
zQMCOBG-)6Gu7y0{=tr0d`l#RLyHXK%4L9j6#5HKlRZ`mdlqW@N=5hT2;4$3_ih8}@
z$n4Uxx8BD@m}E;6*gt3Y2QpXhpI#7NNl_Pr@zi-kJyzA(9arQ75IY~pAgxqS16nMI
zI|d-MQ8&{i_DQo?-KW)6t<6}@bg&Kk@=U${DX?NgVKd8j7oetX<3C0bOlHl^rvSRx
zM$1%!>B0QHXDN4;1?9q9w!d(juaw}5PJhR#wqRd20VYja_O{Ey`#vzB=6iV^zf*0i
z&Y4TyCu<F!z9sqgcI&8h^6zRrdqA`GE^j4m_6z#uc7qzuR~(&2kXq!mv?D`h_a5CU
z4f3=TzH>S5S|?=pj$eNPY2E2Us}0Q}wxx0sso5P@_LeWhpzpEx5W(;)PwGAEQzo>u
zX6|w+??ZuD3JcSzyoG*Z%{rA&yz20NQeeSj`8@L0`e|@c^sliC!qZ1xqc5iNGREtF
zDvpy<R5>2I(zJX*VdYk{3)jpGd)o<px&#8$vQc9X5P<MsaeV;cdwj5bJTh60W{xlS
zS*5zQ+{q9aD`$r|2i~i)N*BbO4+*#&2h$@^d|C?l73obn*0$$b^MYK1M@7Ry#Tu@2
z_bc4<W6r|fmB*oZWjDA6t!aE5@kOSll3fe;5;tGQN(enb?!WWc-^YJy@+HZ+?#M0U
zT(#DZwGISae#dAuaN^7lmzxcTOOQ@Up=k@g`@tr(aQyY7oKHES?<8=Q`PV*_X~_<{
ze+~U*`OLwFi?=Mc`{bGRD(*<7dS3&TKm?X1nf*b`9=-8CCDSF3m6ob8g-nc#CwvTj
zCBa`ceRxBjZhI}uZV&3SHZ~>PO*t#yo|+S6dP?7x&J%GyZy_~{``K>FQ&RA?;i9or
zQi+Hf+1DHIGj2OL&uB3@`Q;8UM1E&5VMBOJ*dg<8CH*81dIL!}1Ed8HAA6&i&R6c{
zbUCecngIJK6H%=*`Z+0{3KFV^wV<Qg=or<Zm&0qya;V(w#BrMI^4kKuTx=Z80^ySj
zS844$HVL;&QodgavPj4L`Ru}7nvtw(mJN<qJZ4;*MfuUwT5W7q`Y{+*k#BYGk_0y=
z{20}*^3BV$YT3q`Gdf7P&Sy0t$o?y+hp;l6=!rZ6FE}Bssu_i{_TLpV^BuMrEyZ*2
zEi5i}(HJGhkbY#FQ>_usOoTg5SFWsI>^DY7O!VpjrKEel;~DF;w?rxUCXPx@?l$?9
zH#ZGzyq`7TYP7ba<-KdImW&(>N`IzIOq$&8fHCYli${y&BOTq?wV6m3XroJb2@^L&
z*mprfr`MtepXD80o9c(p9ZrF7Eks506VjgL@#UKfclp1~j3HoLo%Uv?nsuzL*5P}!
z%o40@Urm&8n~k?6_}2_Sdd^!76p3yZT_1W(N$l>pH~cWyod{69n!PjV>|khrgM?Bv
zZ*x(Wsbj-Kf+pc*WJ<|s-H<PZ(}<4eKsS=zami*vm5rqN7l)W8TX$l`?xt`ut>Kcf
zG-Ugkp1w@3eskyh8{!lupg>CB&{3;(mCNYlaW-rJBM%fRnmGox0)JFILl0#bwb#0Q
zbFTg5=GnM8DXE0oad7yC$-o2Wwaw+$q}sD<n$Mejj*(j)dCvqM9x1U6Acd6&zQkKk
zVFhzmH0R$P^q~^`cAK}LU{DuzbmeK*>aF5UJ$9l+5o8-{xIRa%^rl0yYA&<RSY%hq
z8w&}MRKqZDf1M`o4yRP<vE1k{rY0IDpL+x|Ev36|BN>p7*N?*oHe+7*C5^nLDo*OV
zz<lgRtT&;9b8cmH%|4+Fn?~-l)#wra*d?k;t~5zT7qXiQ_})6_Zp{iu0n^Vr-pb`1
z;)wmmb>rv6BD&<c`uZ$o)~d>~0Y6GIRVyH!zco1EIPX{`aAk8pW|86~=?y#baA=>f
zO!QUko_sZ5w7fn~tcx)55k3b=Cz;!E;5~3F<C4%vhq?wI4-pA&+oGOK+vSGUyoW`8
z@wJy4*0n2Z=JmgNkxmw)0g2*MGt&dssr%#>`XP*6K^QNu?S_?v>=x+6?g%<rpYy%0
z{RBM5@EUf97Ff&*7Mhks-m<l@nP+2lqY2a4zk4MmuP<@u_%W_!MZR3ruQ2b`D^KB;
zPUv0ZlUwi&86+K3t?2YM9uTE1!p!x(Xlp^g(4|XQIdQ((HR0D_hm#nM(cv(S%@-Kc
zFr=T<7(25x9B^I0a+N`DJD5rsHB@_*R45tKUE~X;z92C%UxrI<pZz6!p$s9(oU>A=
z3w#vG?_GEC*Pjd(vYYkKOs}tP#bo7)eVQh|hOOZtEQKxP?T#B}heRc5CU;uO^T`f<
zsQujg#cZ2bH?A8jo&~Yp@9pqE?9|4HBYj9?C4$MzV{~>M4Wh;|-Xrws<xcYTXQRqN
zKS|6d;#7?DC8TPXAi`@U*`l{)%VnaLc!lok-fL8m<WP+H&1zj%_h~j&+uk2XEqZ|>
znI$zBEZKP3GHFArS&m(}rsEvs?aG@pvhp-`J$eXqZ|+WS@We<HwnrCN1U7ClQH~=k
zHqFD+s7JLJlZU!Ao%1a$=m(OwbHj5N*$?oZ9JCM_f6%QE7EkW~Y~Xx8m3j8N(3WXm
zEv)I1@XM054d<v#C*25C?$+AWL(VbM*a+^+ESzp0#EVX61Wq;yAGi9QfGs1ILlCsQ
z=i4$?hk8C}6a9;MUx&Hd8NssUS$|<sJA8-OCL&+-nVKtdH?b>NfYFS%pJ&xyqWw2$
zl-C`PW25*B6qRqcb|nRigfQ6)D;*7vCF|?F&$WGt#7Qi`cuKtX7V&mE*}}P^0dFvk
z!YN6<(eb<0hgxp4iNrZBbl9&M=42ax@os-X+|Y*>9@4Ql{Naar>R64t#9S@wWcTQ7
zhN(s#c&`ReOx1-I$;SFR91ItxJ{Vw|F^=dA8Pl5gC7xU1p?SGs8EG__mYEqMvZ7Gh
z$fi9V&$_}teeK2kUcpj@Dp9Y<Bdpir+Ii$@WrUx8WJk?le3mkbvRX}+SZfFlC{T~&
zC5%AX@YgH|?PiSq(z!40XP|(y*^)B(;3x5y^t^G_uRUJewjPM!+F_ArXd7x$7e`h7
zqu5DBCN(c<I?dfKoChOUuvVgVR1_V#9NY=?r9Q~lM{;PV-`Da}+-c8@6HD{rI-*pl
zah!HLtl}}Q9_bz{>byo+qZB@#Z|-DiNX&F%dozvq1;I@RilfkBV%^3ue-ir)eh)r%
z<p?T*u4`8(3Gxfb_4%jI><8J`u=|+pUhy|}Rs?Gq7lJ22oR=v5k3RK7I);2@3;Qhx
z0<)16YhIiu7kC*9bp|gEYi0IYu%o!Kj7bF8@u@zYNSldyYkDiP|II32NYnU|a>X1;
z<6tDimaO_2wNoFj%)yk@6`Du0RUR`jQ!2KtDs!Z(=3l<iWa~#LN=g?(&zjr>xnw#4
zm7`}Axf{)~XTMu+aIHKd3M;BFR>2bva^h|I`a{^Jd>OF3VE^s1zddwegFpHjjm<v!
zO$DbjOGGWuuAJJ7FA3m;j$e1GaxUSJnOzdKcEF#)d$Xs->ih_+DrUAa@H4Km9&7In
zZ*n)wc>E+<+x4ibLtx~J8e|-r0IgJkoWfUBwC+=eo#mholFeDOJLRS$uSrw(`SuCJ
zPHr2?Y(wkHC9+?B`M(0GG*gld?CBjow_E&D9qb~BJeqL9Q5B=v@=Q+J_@*K7$)boC
zLAO2B`=ruIYGhW$u9d)HRFTe?@X&>)kD5H{+%bBz*2+NcqgQEg@&c9nsQ;uU-s3S1
z*CJIB8;zF_M<2(ZCj~1_l&2$aj#R5+>1ef)b!!p<R)Ex#rkY)a_-l`ZeZz19W-SFS
z!S2g-nfG4xoedWJmq;vo1#BSWY)rpz=7;8B_4rV4pqSVFNO|QR2`jMS`H?!NlZO>`
z#-NG2R6>}P3d{SIvx;NuRJUB!iWQEiH&QZ~6$l4E&WdoJ3yB<%xdn1g$)jJe$@3^u
z`E@$RTj@iu1_!cUQjO43--;PivKIP6Q(NC5k@<<QCo!4$ZpfCfylQ<sk5Il{FOSzx
zFWO;N35H~Se3rL7ubX0?X%^$6lhHOP#RH@04E6@~Q`KX|L<hAY$LQ?AZ)%$wf=<<I
z!G{*TN5V!4)MN$J3UgK;JWJbGZ;dd-7cTsG$7IDu6qk={yB6-bu^>b?9k-z^VXkOQ
zbN(Z@VT&+cA%k*#G+B|LgLQBwirImg#I6syDCBc_O!(-Y)Ym0rzr%r?7wDAV({IFl
zO;53|3UaL59s`SJJxKhzqiQVLrDfKL^;Q5Eh4uK$bs{kud4=1yeoji(_x(@TaJRpf
z4BZ$WDE9G`s|oI<HWaz5AyNK@+Oa`nO}XWMfq{nnYhoXhr5K7Kv%PNyt;q*-qirDv
zro`bQKPI&EM_c&`PRRLPIyeLGi`MTxzWL2JX)q4gFE*2%Zq{fQ^@-vqmZ9}WZKpUx
zW5*Fk`)7QS0<>{<A$y*s+Pg&SqjU4DH}P-JUhj97fm#8#5SC7c-xdEw41O+q*I4Uv
z5+AF#iRX61$i)Z9d+X{1%$C-avs&Lwx9RUIc%!Q%X;*j2`l-KBQ0muroeF)P<>&K=
zEejU0qR0Jo>dHocYC#+o+WTc)exDa{yvKys*1A1PIW@FgWkB33SH6IZcRrm?xgmm^
zIF96{Xy$1QI-ro`GoiZ4@QA!oa60(Lr<gyv)bUBwMzJqNW-I#0uAkB@^AwLge03P(
zmhn(nXJ>gYe>&Nis@_FevW$Py=tT1R)Ei45mg`!tqArPhRTFAYTXVf}8(A!pl+u|9
zE-x2sCT|f4f62-xRXGcVaVj~?yUx9{)V)_uG}~UD-#yV3%Uf_O3%}jkGG3g)UfN|5
zWs}1djx<jc<IBSRK<<wy<A`lGkkGl$PI0)|f`^B6+?twJ2~r{MJ|Y_mr{Xz5zS$b(
z_V4C<!LuHd%##w&kgIK<!74gozp54~L`ohPG=gVm*0o((7W`<Fe+HfI-X3PX%50LT
zHza$ph-PvNQWIk-Zw%I}2Tt(bU6qOVN_c9b<<V9wByYjDv(qzyVj?nh{1HhURZcuM
zI9_rq!h(PAFda)hmC@2s%cgOD1S5{&jT;-so1n*2E}a-O@=Kz3DUhG%E73eIZU6A3
zGrX<=ZKt#N{l6HG#5jPRS@yh0W^||-6n$mQtQ2kfQaDUV9(7{AW*mKd-I*g*UEHB~
z#4_j@qTG?Ut53Rf1e3m#?fE6^JS3!%XSpbNi)J&i_+!$Wm<^$6*R|g03}cNHa5?1)
zMFr@%HgDY9CO1lWB!JJy`y!X&tIPL`6^RxP7-$i4)bZ+pEb^--kL$L=v4&U5c*4Ze
znzd&$iJ!;(_(HNqp62LOf}=vJUAiF4Y(z0~UAl&Q{zK12UQW;d(%Sx*Wd%81Gvuk~
z_5<V8QKnr@M;RG2h~^D)x&kBN6%P9k3*|50;91uZSr5=s-qy%nNA$Oz<*eCUZVibr
z$-}22h)C%d=@(vK3QH}Ca$_CVd_<e`l5>3xMZxm|hxlUhlh>DVkAo7mqf!tq_G1oh
z^kpg}qvzow2Rpa;Pb9*{+nNTK3(41liHeSi7frT#h;1#lHoq9P94IJMlvnj~U6EB~
zXhN2ov+)wfv(nB<_+SNg+OrE$=_^d}-;v|tK^P+MF|)tk^0HQPD1J}z1J|#$oG2sc
z;fBRanR49iFh`SF@s~EkJ6qb$%nh?ZVixX4^edm7I!2Oy=Dl)fqccVV&nD=n1F0Tm
zx^0aLiwWX10X5{q)edU4st>dOrNJk$`$jK4qj7q%Ny$rFU1(7gpLV&4udmYqlQUS~
zD?lNBBQo4dt3sh{?BoUYyuXjg5GD_{-Wg%@aQHVB-A2?j`Fg2apj+Kf{+nNSJtnxy
zPuh;5>p*nDz@KJBba(mnB6K=*L?1&omu54KInM~*F6Vf7w*HDzlrh{|r0`6)^b3kg
zAHG<^7ZPH+#s79m%hVg4O4mq+DblewTV@*(Ij+!`l6JL9GH>Wi3A8rX8nR#UV8!Ud
zKU-hE?38hL<u%@i9OWNyd2Za}_yRf=mOONtUE^wbfg|0GxI~(WO?H4c9NXUfYJnKi
zP!KqB=)HVB3zp+V_?!{q;Q@Tz1EAPBG|87$;Ol7Q^U$p;OZ4R2W@uAo=+1K=a&C95
zlbOr!rfO#s6xPtj$|CKmSh%DSeejK*TJluE@pjP;^3-}Q5FViDKu=U$Zaxafz6b}o
zh$KYME`*pQ^fZ$>CRbHhp2BRo^Zl#w2N3mEpsNO#Y0$8A=K{F&+=yBk(D2+q{~lbI
z=V0J8puptUx1wMzfl2^M859jVn-yL?`_-D;Ml|qA4EPn8zG&Y&%Qdtt@m<c(-M4e-
zmNvJ~P5>;d;`#@CU_ZZpprva>>n8typ9hz4zz!6FO*?L^oa{`Q17_H43`+dtGF4E%
z1PYP}cPlt9DQnF}(ege8HQ-eXvGzVR5;KEL;6yu!N7UP0(ekfyksJ`zVqno_TN*xn
z7zqUq;NP4)J~qq|pTgrfB{S{W$+ZRn*iA?qf`ZO#X#_-m$@GnYDu7dhZ(+8-{|91%
zrb#OW&lHY!Z9w-Y>JKC#0BR!0c{Bp!38S*$g)XOrBgEYE=T%4P35Deec8nb1C=>+l
zZ|W$>c_QFEf1@Q%e;Nv`_^@Jtz+Fq$`1l70mMYktYv1``VgenNEImPrDo-&5{m2QT
zT}%>vIA{w6{3jausHtEKgZnoN41aT@@61GU_WuFVXQJJ+2(kY)uYgZRWD=kxYn|1!
z{xRL!BihIsz<aC_kG`I+?7zgT#5{$B*V?p#g+qrSpYC17d2w}(X89Y7>?h((OPs*Z
z4`tyR+0x0;HusW@L{s+$f+k6YhNAS}KUO$P?#Qj2g)_+hstVJ}{<qB*y2OKv`8xel
zaM(W5lDgzAA`tpdoPShM`OxgN^4II0DpfX%KT#L3U}A|rxWPlja$*fw?uptzsjdMo
z`c>@_zVB6^=24o}g@QFFEs@$I?~7=ImOVRKnd2~)Ob7Rua10%nqYFDAq$FlT)bO1E
zFp+T0t_MX<ef>MpT;XWW(7#^y^0m(Qd2kb436hzw_TT&4m}2gRxT<ka7$3E6+Vf|B
z^c8$q9<34<d_C8ya`}(m2d5W;ez_Y3%jgAxg|H6fvKz}y8=yh)y22>w%@_xnn*=5(
zRCa>`3p86>NB^2ieO=zT7I{HijwkNV*x1nRy<|W2-URjFHob^*{0_aaqsrD;3NQ4)
zg7QKHHlFu#)8OWr?42K9fQPnO+YF*pbgg!j7X}DT5d`&LOevZ@=$wc#Ns#gt(B76f
z&@DoQ-V+R$fl>}b(I9LQont(^1&uh^O3_U~K*5@R9|Q#tq)$-npv;;BF;$805CI~-
z8_g1d69O;uz0;!&NM(OmX>|i+biLT`z<2W-04Tl-ZoI%Zw_bu>=M-uOxBihlL_8os
z;mCUgOhR|8bVV#=ZYt2bAwT>Xl$te#kiI6}`2w{ys~n5gdHrZK85(jZ_SG%Hx7YY*
zIP+{y(84^>aDi2jvgXKF-2|;I19SD}5fte-aQkjS!ryxA<C`ZpbA3S{1i*K-KD4X9
zWZ~N17Xi(KSa_Fh2oG5+g3Ktt8Vq!u&J&ryzgh#A3={;1<3}wv;HhYmGyJvO>{L&#
za`*gr9F0+58pV}v|EIai6Ph%7@%QjU|F}yf0huw!-&|G!0Uz@*`)_G9T?}SMt`t2w
zm<>g=!`qQ?n+8wcEIl$tvkmIdU2R4a-b3kc4XO|qq}DMWk5hgC<t=c}C8k`_1v#|1
zZ%G2iHR%-W&fTpYLJ_icMpcPK|J+YlBMNc^GSQVvmKg+4;RjJnTS2Hgc)W_Ky?x6H
zMO+BkW4VQ1ryYPMa8y4@aTq#bpz-j~L@fCg{%d~3v)x$=*JyI%f+WMg)zJmC_VZA~
z^gcZ1ukeMu<EMv&i3(K9&sAN?!HNxfXElG@(EX~ewj!pa=Fx|@UW>e#jVcub2BQ}g
zg(^~o)c6(umXgyEuv&23=ShMi<>Ir{_<FddmQV0SlN&)<|L%}%DUDDDnf@6_=%Qj6
z;GBY1Ul#<&`^e_<&-xR_L0_{_PY`MXrf+yYriM0wmd|3KZ6t$Ty%g%Dt`t*7ZYI!)
z;{TZX6@HW1|B3;gc?a0GVWzKn&bJyKJrp>4*^-eh6RJmv{%j;aBZNBQr+>hh1CkB3
z0(?SrgB*O3B3r4@FYiEmQUUrl9S&y~JTqT=%=mNG@?3aXwmBnoJ=%j(`r87qYVbOo
zbi$fFJc<a(p<z+@dN2qt>FAZcs9w(^iJ73~P7om2oBW2se~zusM{H{-C{`<HzslxU
zhcLX!V}wY|#dyjeT6pQs*YX-1rp)S^WOy^nS;Bi9bq!xqC7TK`(k<U;oypHd{$18!
zX+*$d<Qn;S@*kZOdhU+U<!B9{{70>e{#!u}y-Fi$3IA)S0K{CrH~(LwGW67&X-PNu
zSP7oR|0#q;w7<i5|M$lk-Dg9s>GAxZmum&%KEhxB{F&`<*U04#E9aZYyu)8*YRT5N
zs*&A?3VoY<phC#w#ip5yGGnuz%uVMgCv)*&eX#R=`&Zwb)7_Wfx*I%Q8wx)3*u7*f
zo0~4UIa~a>_)z@Q_n*c85meD<Sb{!7PK|X?fjHBgQXJ&>K;sGn6WFUHzW0694F7Y5
z+yeZ=06568wcGj2h-rdLjZrm2D}GhbJt&k;U$w(kK<C9B19y-t-w(jAYn_#WMVPPT
z_rptYj8P!b)JmllS@FYclY!o?9lKY;phduusPWqZp(Jd8?$HwSp#Bb2Zy%tWIHo2q
zz(^(LhZo1H-ac9iAm?U<PjChZJ*S4@{{yS=hkrL_TJUqjg_r&CaV2;ksiF9UZc<2z
z-Yfa?w8HQ4BTHXwl1k08;=!td*Q<6Zt?9=lEEi4KWVOg0yl=xFNga^cXnzP@8#hcF
z3ek6n1slFj?AxXZ*JF=R%C>|iDm)gFZS3USH?cx%HKj1O#dI(#OyOKK@KGK}g1|%Q
z%E4j-BFC61^LE(MIIF|h$m&gk`1}71s6h*it=6~s$4+bpvxsdmzP-xE{v?IlFcPDC
z77saLv?9<u4594Duuu-*!98L=GC?1@0lv5y>bxXC&MgR6ymPDfxivXACH&LH|Ne7b
z@_1=D<9ucoaeV|9%Moar5eC|Zrsd0Z6*4y0>+rMrBHW$PhzRu5DW&`i@B~j!H<P3~
z4WREAA;E2!lb{8H8>m##*bqX|JPTaQR;`2Z$Fgh&4-iVlrD9AOATxddr6P30jl98o
zGHm;x+4Tf}>zXOLvB#fZpY8>{@~AoRd}!7FUKf@ISi!hxLx2lVB<|S(^YV=3Te*;0
zBNY}4ELda1fQ|rbOh2eL8{1!4p;ZF04Bp5CfuV;f1Sx-h>~)^}`IE7$i|W<Vo3g(*
z>|XO8{Q^cD5Wu1Vh4(=CzIV$6*Gi|z%VKa%PXlA(1ZLiCtN9asfSrpf6iIlU@HQB+
zkx@rLkI+=+tG_|A40`ZepwuvbNj&lbG~zCfoZ1DO*}8)#?5*w+SI_UPT~pH9H6`pJ
zMqe%aeH`?96(6lo?n3kZBg}sC-<F*+zDP`!upywuM-#wHT?($P07C}>poI4KWp81z
z-1nj|+K*>X6*}BB(U>%C)X^+(s7#S)3HUB~GlsxgbSaJIONZqY+-o)iHbig&@i#X=
zbDj?)vu&XGeO-+WBu#+S0y0&*<L90_8NnQGu{Qu{>I^G!iQGk|&Y+^d<+ch$PRV8a
zD5w&B2bXfk7nsPmKsUb8e+?AszzI3&*9!$d01&Na-V_%vc}V-QL%XNz(0}DMgv}s_
z>48Z)>o+_I0474_X?1$Pfi0f;)WmgwfI)%uiuE19ltS%0=$rN?&<*}T&cI6M-Y3vF
z!)BmqAFRJDuj*inkBv**RIkDqw7(GJnvR~0ufjg1m{*D<<khps=!Xi#ykL9T^m_fg
z8i)KyIh^vwbiOc1zDJZ(+R<HI_IiASxEq+R=F~zrJB1aBbqS-6JKx{>y`%o|)el;?
zz(d*c<f>6Mtk+<T6=&5&t4TsX010SM+7G@DrI<4~VJ3h*O5cOu50xob*rLIJu=Ctz
zqh%dskc9-qCANxKQf4$~;vQU=oGt3!c82<kTJyAV;+{ktqyF;~?GX|pXip)#5l!jc
z_tos1i@5Q77@m6@F|_HP501is_=E<wz_ZtK4tkX`s7t<MEliOr5^}(L5qZ!5*N3Zr
z&gKA>5)8j!k;B;xT070Vbr9SNUUrDvcv8L#PkRwJ`X<jwuzp&D4ycd&;2rD)0u6c-
zvH;59NVb93L@ylWc8m9V<ybnJC?*W3sO+Bq70pOf9yPwCxUS(gW7XzP-cg2@GTiO;
ze|O1xY&vSg4OTs0(?h`9phJ#Y#Al=H5EOMJbWw(CM9WI>(=-uV{s05jyfWfXODKpk
zZ@|VpclNC@h+F2BNpIDfptw0I8hx*^y)p<!H%@z<xKan&_YN(^F9olcpyfxhLtVwW
z1UzM>SHgO9JKqE{q=3E%u*V}Ou%+_ief$CRA8a_Dx!wNhvB}uJF6}H>zX^OJ3&fLZ
z=d2l8E-DKSEA}Q>p(XSYd<4Ee6CyP&oRr~qiuIPKv+1&33(L*Fkn?Utr>Dr6y%d@8
zTkaLnafsLoK;Pr=NI-L!-e4++5D<wEI>&esd%i&P`^Z1Q(CN`x{#no#MDt#da|fO9
zgdh2*{mcvv_lHI}Qr{<GXDbQ-_NAP5LZ;Ycr;v$EP6X{|>t;usT8@fop>9iw-;TLA
zr|=c)+z*}^r3d7XteVmIvKJQ26g_ky$B)9uBltue*b!%VZ}9y{@C}Lm$U{R}^kRtl
z2SIqb)whqldD}}&W}@EFV(9*k?X4+nybCJr_6ls-gq+U%%h7gWhvFl2Y`8mZfH6k1
zNgCp<?R%-s#15=iD-YwI5#_{+w8~Kh6bP#@g&j0|zrv|ATU|ZwCHtvbhg1`v^6+A*
zn5KTPv)%k0$S`4&8Tsps9LrPGOOgNEJ}5k*;4YqaBxF`v64u>Gq)1V{L1|VC!YZ1l
z42FjdxNWr6>?TXFkv6ogDfTDq;O=}*L*{nw`<}dh^A|aj3tjw$KvksEJL*Uf1YH`r
z4g#O$25{i$V&v|KG3}OE7TxDP&*`APR~1B=Kv27OrN`D&5!T!AiEp5s#ed|FUUOkS
zOOH_4aHb*L1x{8VZaJ}+*78WOg5z{Tl8E0Hm)kw?^61O#bBI)0;U~IL&#{vm5{wFG
z9)hnWn8EPsH}>28P+N@eh%ScCJY2Jib(XoaSqTy8v^KtjhU!c%Gl5UIl!!Xn!ve&G
zdWnb%>RV)cvu3T-*sNxDU@WMTF<i~Zt{>qJac4yywqeM!6^ElV#`v7ZcCEhIc@k?5
z%FhJUt<oSrMJm2}HT+z61k1;USaLUT5XJCPPmS1q!<AduuVB^7riTW-M;o{rfi+VU
zg9rgz(Pv|ZRRJ(HC{0UYV^}sRMiRee4^CnYBgRmPV2whuYBWiU@?peS@FKvN>pcQf
zXwN~*6h4+R1DHcR^7|Wl-d|R#!~e!6v#d?Vs<A|6HACU0As6v&x?J_{tGSQJi}5Z*
zWh}7`uyPkm2r)f_3L`3;?42}k@NR({y5*j=T%eQ)S%zA2UXZ#KfcFS|d;Fz^CaJsU
zQpq@rE6P7M59Cljo0FXb9rmOUJ#L+t2Q<UMP7P}?P{Kd8g_e?mRia|Z4VTJHI+PCM
zrD~TkKGDksmhm1Ev^?yQ-L*Cl@bpSKc)P6%)3%pgba-9e#qeh0O<Iry#hG7?zvv0&
zoPrY{6VE}#1$X}Mk_*DhCpjD+SzjR6i0Q<<&bsL+rtSCdpMs-?zYjXn2z<;ke4YHD
zBj7<zN@+Fz=dJLm1+<Kc;l8p=La&9D40~fpU)v8&l{;244h%CzlAQq*(ldM|?3;4b
z9I0TFTqr=q{jAwxhL%j@kpVwc&uc$3_!^u#M(q6g{`AkG{DWiYWW4^QDS)EnPXy*x
zuY`q+`e7n(L(>mGOEP!Q{ng?SF^itxi3)89LXOt1dk&iZh8xmf3u@ynAMuqY6yf{h
zB@CzJ6wzkM+W@box6znYuri(@R-k3tnx~UBmdqb}M_`&CW1QJBNz&fVjVBD)UL0)>
zkf-Qf@~`yUM{zZ{)mirbrWn%;mY4Kfi^^~up)%e;w_q&FWZ=H_?bF+`Vzb=WR}`I`
zTOAVG3Q$5;o7Z1N;zz64Lr?7Kt$~7Rx(~g}2Y|#JF1bXucO6n+UY~R_hbZ4%)Gpo@
z;2?i1a~zYK+Ko!FbVwHZJZC8r6)D|ZfAM@D*e@AXq6)!qN^JoYbbOBcy??r<%0?ii
z@n%8TMD%CEjvKw)u1<46gNwG#@1Jm2vM3!KSkvU*9Ci$RMPZkoYB%Wejs`pXw<HTQ
z=nn6-E>B+exh5B@xe=1af_C(HO;S%E=J$liU7F&1Sn<BjiffuZtv$<{FfCBq&5F+3
zY-yrCDoFZ!zWOJa;MzeWxb2pfTV`SUxIKV=-5IgC@3+TV5(+#kK09{)tBtq0NvG0C
z<|g<?zQmKW^D$vR&9Y?7+ba#5UFhbKE*LEc$7&4-B3r@f6v?;fIehk;jidlIztD$C
z+I!QerYfw`@KM&!2USnT45*K~wX^h|+H7g6_Z%;Ho=Lx5ZFm%Yb9h1es;!I>an6nX
zHDUdcXV4sTc;U_K7Tzix^7zDox46iUM1vnof6AJ-cfJCckMEyNt4SYqlhHfO4Akbb
z;zCDcS(*G!gPWXb#5QHqk|o`Di=lL0h$qB)X?VSx_Q^`?s+sqdCH)<<L|Lj&B?A7;
zkJL-#;<`F)$vX#}h@W8nSg3O6GDlVXj|G69A_ppy2$+J;?bkQzw3mr4@$VN`$t8w<
z|LA26zO}nlu~%}1##<&9Le53S6RJdub9Hl5ZFBwlb5j$Bbi>u1Ig0q0KE!mN@C;2+
z3v%Sn*6Kk*rT(JYLC^TrG>b5CN6J@89+rGh7un3BpNf&rMB~t}MBvS<F}^lybzAQW
zDiC&M-#sW#dSFl8N>;|LdigjPom{onQ<63`v2ZCK8Sz&xg-t7+;`()`h@aTs#k4nm
z56``t@WO1$owbc;-d~HfN=?VOOmlpBV&^lNug}KFH%rYGnmD@%XR{SHvvJk?LI@pK
z)(Zwg#Q8g4!_CQqZaTBQbxf1-7fE`jc^*n&(j4d{$4-$nkusjbp#F}!B9T^<>XJy(
zbQk_JeSGn?@2KSD;lLPDB2p(M(>fzWtBrq;QDPFtWd4y#O<Gdz@v%p}b8eXv=9RwS
zGt7rAY-EC1`L@UU4mUY3ET|RdCy|kw_tL+w6ko2wcm1A9?^>Xs^H@hxEMJ#Lozs}Q
z=!l<d$*e|?Y2hnxAl7oTZxB_O;L>kL=p$!(pl)zT8=->LdzE}Cl06OIvmzwk!T(r4
z!7|CWB$%a67g6x*Eoy_mgN2ar69;Z@fO3@IwQ9y3oi7}UH&A@$)cz?)JQPV}$%Vz}
zSU5w&?!zp1sksOB<paK>O^A0PuvNL>u;>_U<^Rx_wTS$nKR#SdUE~KMWCU6)B&!wE
zl&Zj)qAoAc1r6m>J3jU;6|qQjSwh;Wnk8g+8cq-^bD@&$1(iM}JlGa&(3B_C{59BN
z*{-UDF?uj|OKkxI0#6ne&s;rh*i|{Q4XlgY(8K2$s=;@4E5zUn#OG=w$Nqv$9e*fh
zMpv5L<`_-rboXqN3>JZF(_M6lqD0{gInyRsBkPo<b4Z!ZHvA1Y-)q7f2G)y|EcA0J
zwYp{6*66^NFoHC%@j7k95SfCr&_WZ)Lb;PgLfV9WVzB>SWl`KJc5wHNHJlh3eVx8B
z#K?CDP}2K4_`Yre^SzcXSSD}Kg<mA*fzdfdU|d<YaIC;H*%HDGnYH~An~OB~WB!X#
zA&c#QIR^3sD35oP_Y?oI{f}f|tU1f$l8xs+(%n&EQSr<j(wN#;F}n6QOt(Yeqny~%
z5S{+Lg%4k3V@8m<<f=vF4xUvCjKzenUElQQSV~<cTYtiG`yso;jG^*mHe?iBLf8Lc
z=W=^taZ3Bpd?S)qwR(luFcNyHz|;}X<jp@xlGCDr0=Lrr{%e-@Y`1F|u|GrRGjrx{
z!R_ejX&-Y3*InifZPG5qUn9lGn892J^}g38=$6IH5_I&wS3AF@^?7uVcNV}?clkhG
zlv=+`{>snm$}>C#)R*l{|F!&+U~(yGOLmAY;X3$kKB>6%;l&-;YG9wgB3(=s8vXdF
zrgSon9#Vd#LA21Kxgfjjq^@ZQ+Q7^|nqJ9oPo3C1)llBO;o$SvZA^NR@ta*s-BqlD
z(Hm2+^9#&9G?w_c(!kAzb-SE#aS7(#(;n^@dH1cn%Ou@byM)cEH2U#IQ8SQ87PuJ{
zxZ*-vjq^?KL8kOQ=^H5uu8=skz0F)1yy4oUc0wTvB-wuEar5nbE13bJx?sP5=)DGJ
z2neiZU(PUyUk{j9#A`k{41*l>nUHkr69?BVN(-K+`|`js#ML;tg6%U1u+3u~)*T3}
z_9}5YJiyNJuC%`6RpP7DSD24y7tSW<A&`%JSqHvODCc~A5%nlRT`18DLXt}18(OpH
zP#+fad_f#VS9Y20H3T$uv5-ZqWU{;qwj?l9tiz?}{+LipLoRuQK)V}7a;~2)IUn^Q
zw+||F(4Vsp)q-Uf5)iF7&Z|(Ec#H!tX@xSI)wN&`VD4ROJs>dry&WCrp$*ap%Ne;(
zbeGVYYhl*Up&#n;Fy#*ixbh92gA;W+V1{r0BE{lYlqvOlCCCscMK3mhNum7P<)ukc
z3-&NTCqnk)bB7<4rcA-KDthbu^tfC?E$5#{`vJ#5RBoL2r0xdii3`@Z*Zp_G0tfM?
zV3HiCUG)kIrU+3xlAeWNw++C?BhsAKjNtW}*ehKQ1)+Et1Dd5-G_BOP!qva;v9Jva
z^X*n8e0urIG2G=#01BSBfIK?8#a&RZe|>TD-(Z(C9NqIdW_$Yf_betu(&rcGM4}bA
z-w$5XoJAAf(`=d*(x7yqMsoWoQP4)1Rfrk)4j43-M>^>sV0{C2ge(`MQw{D!yla;T
zp*N6Wb@Xxo#MIy4J&XDj=a|p3M7*KFBfl}g^@%i(*OlmEW&9L`y<vlqIPWg!MJ&s{
zm-~6px{-kB1&KP^#ZlTTM*KClISR+bKK5ewW2I{fKM+5+Zq|#*xRktebfj>|^Y=95
z@Xx)e#VMI1NgyVkm*TZ4j`L^@ZvA$#7J0+~f$yl_%`)kmT;|u-b83~))Y^C0j21ZF
z(#ICTU%=&x_bsW<t-k%m&<!Uq1u}Gl@rElFtp=oZXzzy2Q#Ab)yCgZ4WA@F?u|R(F
zdx+qhRI(Y@l$}%R(R~Gx6Ak@8uA2#qu%@jJqpS(0ZksZ_&eYaHor5giU=B>>eAsVE
ztGqO1%t8X4IGiHV8nZO6<i>?vP&G)Y>*;Tr#wKTV2{}#LAekA*GUFae7k}P?c7~Xs
z4yRl?aH4tk;Rd&xW@FQc7%4<#R%RA?_e@Zi8v{o@sy>u?kuG$5GPYB}lgMjkh%xc;
z4j-DrYOu-u5&YOReG{F_4c)u+Zi+hw4gs9WslhS=)Oswh3eVJg8Jy2fafL(Cj*cZh
zm>D)M#(Qt`3u1Pi^D(tvBC&whp|u^*7U#B2?(+beXU?7+VdWHJ;#JU9DhZgIzLQol
z#XC^y-JbOO3h^E~D1vk-TfsbL&<r{t8`ON435`ZZ&PXh|Tt7h2cNYK)U`&@3!=#~C
z$39dFmW`~m?(`NA9v+G&pcD%2w!gW09f}_u&k~`v2qj5gc~?-D<V|qj3!}&#N3)%R
zSD}%PqJMSq-Q;330%pTepM3v90v|qFQWautD%L;}mskp4FR8<}#nqM|%#64R657X{
z6^u4V^7P&i)(3N}i21C^AuveR$q)Fwr&hjmci&*)!O5|7w+P_8U{yFPqNXH9gNN>F
zU(Ad$TI3xVdK1f@+e*2}W@nn!Lno7*bwT+9WRuYCrEex)gDDXyH<3m>^jmZN;$6>g
zLlJ|$&Q6xXVG9_oF;+9n5_E%u{xV7KE#jHeCMQr?X=ws11vFBLx%uY$mLMC2cBA$4
zf5LQyL42yap+?i@l$Z24>Jr^jex*|(orEqt^Bh6=oOCS|N_ZvuVdmvtktN3et-^;R
z$JXi>aDu_9m5_f)P{+(Rm@+r!V0fESgp^T=%q0ro&10nrYkgfi;B4>O2_RaN%fC2%
zj>&lvw)cvqm(PXd^WL03k~fieerLolAsO1d(vx0W!YUl}(y~Ccs1sC0T9NqCoNjZW
zbDFS$rxjW88e-S}V>y}`t)O^k%ZV%Wf-?8#)O=&QDkUi@o{6%A_R#s!_{)(MO%9nu
zk8}t_iL&@=4$!#NTI~0e-y-pyXh&8pR|4({>lu;W%EZK`Zsh8c?IsrL2IuolFkkZ~
zrH;+bh~O~@me;1^3PquyBzb7m28jdsV<3bT9TrwLiL)%2Vd^e>!$gDdHIvVY+{<{z
zPRV;%nWyBkqjiMAEAzr~ihP&)m4-SD<Ph~mwF1gfA@rLy+CnVN@B8_pzjG-cN9(Rl
z%Vi|oW0NV3=e|B{5&nyf_34~xvB^@nizhl2`xw7R%kq-GokfUbw}NEXMyb+c5kE`;
zzT#GkCB{IX46HAxCr`F_BCT4FkX(qTPLcjzLr8p{HSG8;<u3aRs%325_$yIPGAXKl
z=V=niK~q&l9pgGjMVvklJ~L@tKz3aaB&)8r{hdv;Vxhug0lI`%(kEI?)@-O{a-(Sy
z0zYfWj{UuA46M~*>^%H}qLkuIrcxx|gUnySWcRD$h1oc8C5bS%mckF$E0P3;B8IVN
zC-_>9^M_Yeg`RZ37dc3BvxJ&!Z)-9|?a1fZd-oF9#_bf&n|UN69Yn;~pXypz2fqj#
zk8>c?w4{DBc<Q!A!ym;PgNd_6<WFWy)4uVp?dW_vlyJUsyCl?@@KS4j2#xJDis;kF
zzD9cY&*)A#LVP;+RMh>F$?c*~(d><6_M==aiw+cjqesSff>vfnMO}$nqg6{Y%Eu=S
zq4gzeU#6!J#}3}A297;XzP<7kb$em<cMB4+z@mUb@crIqEk#dM3gJrz8blkp9epov
zH9`I{o@nOx301{j``+Tu6?$ZMl&%oe64wp0Yi<iyvnciZK4nqoGvi@(8@epPQBf$A
zUloAAz=5@P^=zB+JX7pBSBt)zm)1k+Dbq2``v|fUxfL(Q3L+luhY?b2SWoq&Hlowy
zf8bj)u~CPW?+6ny{$|6f6D|>A3HOzBjT@&_#g!{%B+?P$pt-M3Fr#O|4|%j#q>(EG
zJ|`UmvUs@r$f_fY)^MW@4vZB@ppokoSL5IIQT)`xb9t5^4<&Mm@P$>IRf}cL4Ia*H
zNyatPoJFjjq7{|=v8YNr2RTbUab74wp^JpY1Qx*t{wjo%1ngMW&n%+ZvoOn*4MaY(
z-=}CA;Rr|+a#WfvAY^5|;*b-XKZ{YNlur2qG4$Nf|CEukg|m@@PA&ezMM<JoO_c8_
zwK*9^E2s-TH;sj%1~ikIf}?f%Ih<$1sI?KH**}`iS$D&gO?4J?3|PGec88=Si0$QO
z3Vsc?7uc~7BL%`M*s2d9fj-VKbzcwH#+cyQa{etc3Ize3GzzQIE-e$QrmISo%W<80
zRHU0xSp%FcIu|9`vIA~}Kd`45@kmkglRh35u^$=B=u!C;sY>o&Y{Q1D&lrOAcvL2(
za>IO07bS8~i@4k(D|v&uebx^DHCs)JSru&q6^;07mBgS252psO+*n^|t7SA@^XbV|
zy^n#ADiAMM#+34;RaL=YAi%uj=PA@65V;z%iFhvbc*vgI#A|7ootT;2^+GKrcC_^c
zXTf)vH*U$rJR%_&z4p--alkIH_&klcNm_(Lgls*AbMDGbf`Xu_wPJ_ws3(olE+3~8
zWC|Y$-F2r|AvQE4$k{R*?sjGW=}RsWusC*6WQBmTP?0Q&xdIO-idm+-&RTW#{)grk
zwl`7?=@S;S_*lbANkNPbsqV@P{%j|?0}quEl&^@M>t`X2rXmKv#GKyR2yWQTCbIM7
zQ1Hl4DH*{`c2~ZJE^FntAGl%b7`>=e`R83Y!nfy|SjQ#sd0ODPOQ1C3+tYljV5)jm
z%XC*!yIVX><zh+xlj1o%_AKk!%8qQ+CYvMFE~XZv#KlY1Ai~8gMe&Gcp>E2t;=c0V
z{LS_y9{2U-7mS?;^^Y;GoO~1*^^ZI~d0Kd5W@zkl*`M=q>sHHFq_v_Pjn|NG=m@#`
z{a`-Q{f}Kkc~0y6MZL)^!!NZpXiQlX6Y3<Zbo4l=QsqRa9mkc-S*z-CU42^Fg75kL
zc*UY3;nLyCx^+<O>MtlJ66QQ4rnSMNA3IfN)gkzc8l8^(I8jJ_*}>E<0D-Kq+i0zP
zCu2%aGUhg##bi3dPJxYgPgpBQ?IlI%V9A*eiN^I7`J*f)cPar>pLQLu@UN)at^#Jv
zw?eF8D8AE*@n@%NC60rRax6LPND1NXH--KMtanGgWW05?INBXNNO1n9ciqLBM~Oj0
zKQKKk$<q?ys2BcH+?i(a+<$Mgopra~^s@>)RLBo05hJ?&#^G14)2>hs*v4zqwkz1v
zo!mEJ9$YP0eD$s+E=O0dh~=t_-uj8^gKb@#iv-F-6#X;Xi9-FuxqdiY$-Ds}^HUS_
zN%d66seu7!|C_g$5W!ib624C!dW_T`6B7-@P<NWeJS)YTs8cBXxNXAH{^*~!1`!lo
z1MgZXK9=;D@gWGQu*#C72<GDpwdj^|i!FK3OTfPiK>$Lj&9#d7R8U6Y`hUPN5I?B1
zs1&G1^%@?Xwd+^G7OaK!erMqgZXSJ0a+cX>#dp(AmS%lsZd+<BA~g=H3J<URbHFN<
z@Cl;DJA94VJ#>NB@?F+`nhrJvnTLMWcFkE6HceBx--|C4Hw8kE!l1IV$C^Ec(;+V(
zMf$M}_})qp`*M#ZKQVKH>(@);4si|FoWpYSadgCeKN}>5IQ{{gt+ySCB_()mxAN<k
z`gIBfl(+XGixkpB(8uOeME~z4v0}<=_fBvXndIxziBPT^y1$j3DAu)JB#$2`{0dwt
zvMl8L?8f*9=#rIfS*zw=(fiwl^EHx~;+pm%H&@+S)p!1G@NaG#7eU8W9_AV(^UluZ
zPA!Fw_jH^Vt$@wDhbIwBAQ=jR9q}#$H5&3%+t~kJq8mrvZfeh<NYY2|U7PljQU=8`
zd<tq6r)*xgZx`}==FT~hM~EkVlKITPXK+pA)NTP5ydk{4&y}Lks1;Ik{SbiOZWnq=
zZ=!{$;1mk_Xw#o5@4K!LdHG<={8>khP?+20ezfum3U6INJOUP1kT(_tH**lNjl5hm
zD4r(x4Z;c={|Qde5uF`v44-NJf=XEk#gmS~$FS=LjU4?on>Qdfu?aoFl0LhB`QD>k
zLCqsC+bQh#0P2rXxqFcP>6Mkt^)&ss3=MLu#50!6s(TQof?29L^c1NEetG^o;5h~&
z$!SgD^V`lraLkqf$DT9!gy)M74J)CRJdfrHe+59x=zFw)>`bn<>hs|?kc+HkUS%|X
zwvs9H+F8^wn*%Jy0G<Q?FKDbsC+rwq6$Jic2upH*gNh6d8Z(<)f!hSE1dAWAn;Xt^
z;QymaYm2bfj+0}iZ2Q97*ekO0-HyNJ_ZT!`fzdT_FHlw3L}jouhaCu)9bRacQc8cq
zA2et1y<$?8#TU!wTBUm0ZbJM_ejH=jL7Q!G%D7JJrt3WxZ2ysqW$rlNZJ%*!=N^$5
zTwjj^hL6RS-n-L9l4&Z5OAWTK?+g%{#*H+G&J#i%yLRUflw)i3%e?p9W1u=~^#e3<
zPS@<*D6eg9Zhg#^4>{Qe0f5zA6Y@YQZFD&onqj!@!``W7Wc%suveHk|My<CVKm!G|
zt4wAcD%@efnj<$wlN}tL2~Voy=(n`oYM(3DkkR<`9^-~q+4Y4^<XnSI^ODa#3SVAn
zZVRu;+{SOzCC}D%A4PtdBZIl)2wFqDU7-6EyUm%w2C0lIpQlCnnKa|-nnRoVLR@IH
z-j!W{v6-?o!FO6YP<K%t+b!oPZ)-L!G+yoGE+)+rrnk(kKcMWtf07mdz9lsNt?z!k
z$%W3Uu%{7F*%kfr@OhL@z}eq61VuZI(Ed`p=97l7c95izphBK8(eb4LzrBT|d$LY%
zl)V36dtV(^RkyuMY>|{s0qF)|)1hqX4(Sf*P)gm1BHbuRmxL11A*HBDN{EPvGzbzF
zsepod$Aa(koZtQX{&An@-t%}o&%w>wbImo^oMXJ>9q;@2`R{ZQ8xIz}YC3spwIJ;k
zlLUFD)0oWNAHG2g$0#e#HittNQQMx_>9_E#^VN%t(jUZBYvi+%!g)ap?9t2}yTq-M
z551?-&6G!`8a;dQL#D`NAH4BfQ2TIq;G}@G&|2&CdClJZ5zv!h6^CUB?RByiOOmP0
zYNn@K=J95ug!7)YecIHRKMPjI__i5UR8v2JW4Yz0Bfdu^RTDyblY1t?K_OJ=RT8`M
z-U(TKN~^bal!Qq*X3RT8zj<y-&rr3RWj|69Z284OkEQZ=_I0oQG6Lk|UfSoy{o$wL
zBem5Rd`?_Yf3L)#fUVWO2|!vPRkPdcu!@9%u42y9dS|{Hf6!hT%F7S_>7FrE+}j=b
z>mHcGnp<1Tq2mXrm0tj5rR(k25iwD622q?JF|kA3kB+e(jA2DEDz~*=agE4;Cg3Hy
zO0`~M_vc5llJ@Y;J!0HQrtRfTk)WEZ5X3Za;miv86Qq%87w&qImQVUs91jv8Z}Izl
z+2GgG!$14<#+R$_lA=k-0uXwn-mD)GpgV6Q3BI5E^IQ5A&c&GP+IOa;rR|tpnvG}6
z4Q-|;)*r8*_4CVEX(T$<1O(LC?Ush^0Crxx5Ep96ZDUNzrrf{)_37vL*#h`F@I#_;
zgGy<3ua4AJYzD}Fu592NNNKkODG|QeDX$5^hBa`Y$?iJI%ZM21B;0$MPF91uyfy!x
zy-Qw1`o`Mi!(qV&#AWubKZraubNm#<drmd~;e*n=WYjM8EH#$VheuG#H<;>jm!NyC
ze4?f|Ps5j{McSJZ>ri{=){8%8%ywFd|Ky-EG~ES`H@6&16DUr-aoUMv#ew~5?^A<~
zHVt}gX>qgsAhEfeR*hZcN6OM)bI;nZrt7(|Cy1)Q52!uE(Qr$4nZatAvE53tS@s=+
z?U_Ja=QMgpE#P1dNLkDauo8P&O0cen7wczB5b>*dfp!EK<l1RJ4EViuR=2Cds#uAJ
zeEF&qU9;JB>aLzgRT>YU-q@ckPqElDzy`3tu$DqmJL@hsUz%fm62myyw56oe8FTY0
znndwLJ!#fz9H1Ow-0l3)gB|y}-bNd=yY-Wh(x1SK*KNCudi>b@tqYpOllm88wDZJ!
z(I)1q@A(!DU)^icltRwC93uX<&9&DzjHA-tU0Zos<D8GO|J)E|9lt32{`2eB^z*+_
zcr5|b{!$Jprh;7U?smh;QR78P^4M<(@qyZjq>N%gF-GpsT>8!Ta$V6c1ZFWbq+)a$
zVjW7c!EDLSx6b^gR0vRMSh&x*kuq-(RMw*975bQ0?D!t;*IuV66xqh?D^ms=|Cf?2
zEW#^5dI+Dwvq(3<=bc#Sjz=_VN0O}{W<@jdgP@(vQmRo)_mrues*7|}JOhaU7ai)|
zeZlHul!Ui&N~KSegN(z#d$-P}?5#(HHMq&6gP!CyGqdbH)0h@1#<%)GpWfBHTKWT9
zoYhoIsE<A-WkO)bODUP~7$+ZfGbeb*iKOo6{n?-UL)Q{MU9>4cbNJ{hsp4L4`J0mn
zGyoyg1DB$@dsc>O?GD4kX&%!V^y_=m)PbD6cnMVX0S6(dB#xmWnMKl6>h(&<;K~FS
z=m^`}<Dzbm%v?}Ru$^|Ad56!pie8-TY?#Ljw)qLA&zI;Psf;lG6Q}V%j=p-5Gxcz)
z3(wm>v<%BI!O0^Ll^wM0W`XM9Y2U}EIu#f?r|{)QcVV#FL(12xEh`|Dkf{-g<1Mlc
zYT9q4Z2hpSO5viq)m=cHW(aCJsky?WFTqPryH1ieb^lmCY8nSG|GSf2?v&pa`_R)C
zKPFt;DN)*ZwOJIT(hNQ-F>)mjtf%674lQXcvE5GA61(Tj@Fz)5aR_+8*~)iPM{B%j
z+SCe@y9sKRQ<-zm_ZQE4GRFM5tjZ9W%7*f$e(oSd!M*G^Dey7jm6GB6%5N7tN@LFr
zoZq0O;)n{k5Roi>8!{(*RPK`|`=A6TbrlaPxxUYI7gdz@#Db|U?K!tvaO%8X<}2nG
z1nVq*cwcq<q=u3_V~3K&nb$wt=()W{JYG}I>oPPezm}B0NR}=+utLe7JE-&=@wOUj
zNWbxzF;&G{U_Had$#MClVEonglAv?Eloqc9E>$E<=#?>6FxcW>Y*F!?Am8W3UGEFV
zReBKfydyi!0KXzZU*&bsFUnQf`?Qk&#OUjJ@-2ehJm(3g>S=_0TD%DN*k%;&ij*Qm
ziv#L;OP<l0Pw#m;1;&KD2HU<;WG{YNk?KHE4|+yYX46YWHH??QLH~K?>fGfCEDJ|^
zh*g~S>oCGpaUe+Mt3Sh_NqAZA9h_&16ZnE75I4oWjl=F4Q(w=x(UM5j_8p=F@q-*3
z8pm;>&<-+v9|GE~wOjTb!_KT~E{*I$q|eyLgw`5#RyzXDwY}!~!o>a}kG?8BX-HsO
zcRqwIE*?wo(PN=$H*DT^rXh_->^R%0n(peJDe=em{0LaDeP*|>fYOZ&1qj^PAgsPn
zYWsk~cUoVo^6P|jc9#-0H!i;Q!=e~|ve+(_%OWy~xV2TH#%JG=_HmNcwTmy>DV>Rs
zc9`*v)foxD<y+CUfqr+Q0(HK{V&9x?Crm@=QDQ*nDc3ax)7BvL%q?18s>s5d<N2A~
zug?2W*!OdsJfT8E?ie4S#&pM#k-|eIihbGZ?e9m)QI#TgPnqMhW$iYoin1tl(#f7l
zhEu!aG;VfMKMy%A)tVfYp6H;P*@&HLGJzZphsb9EeuisWQLe(qhnHJ)&LqwGOk1e<
zjAeROnlKA4GnzqW#M|VW3tsL{+LzIlOCgGbdUS(BYgLx}1uFHp`~!LC$<NVhFjJe5
zDyYy-py`CJqnt27t!3K*Z>h)!P*MCW85+vW@j6$st1FA|l^JNO1Q$@I_>X8fF+bLr
zy-!0)n1-{==tQ^pF-F~Cq|#uw7-B_o7|L2~lKQ4X+{V*H6~2_sQ-K?O^@RRI4z=@_
z(to7A)DC%bHZpuLhqKIC$RxNN$S@78qspVQiWL;6PO&}_5>#g`9*7mGG`L_$M`F=Y
zpmZs6wpF0*;~^If=}5)Bdm?-IxuU-;*$uRifmiw~)AE)h&<4zE585pDq+^8nJ=}cV
z+90&OM^P<FBAEJW=fOXpA)-J&Gq@GrT!9)^ojUe1XDQ`o448&p#OAXZTchH#Lu}pS
zer`P2bf93%&G4Ob7_~ON+@HO)?KALSrOp^kA6RE@1-e0Ug=g%01Qx^*xy@F_F?OS)
z;hQ@-vX=f<MvZj}HrO{%)uLIB-NHt<5FbAD!UHVW26iW;Dj$LMtArG_FWz;<S5NqQ
z*1`oi|N8=6sW?Wi?s~`bd)3ebyzPWeV{lH)1@C)9wElPx3=!9!8nWy>HGYQ7M5;n)
zhdkN2az?FR`DS-4&N8a8HNYEL4WafM*T>@Y>Re|a_sQCJn(e<z4t80;0s{9YUjbkS
zBEG!=jMhJq1mVWdfadZ;N-dv0USz&B$&d`0A<+_%LYb}StWZ7|iEu-VCiB_rSn#90
z-X!n8=?Znv1o5F4xxAfYKs`o4dC1N*GmeK|1_!i_8>`5bf&O|HyJ7#&l_^5QC|f|z
z+$i{FlRX(<C`%vAGaH?y%afm<DxgzGW;yc$Tzhn(Gm7l<aACN}kO6yz-_d06fjswC
zU;?o)ay(7eT~I%PaAmImo%2<WbPWgv17&s&3XDi83=FR-VMy1zTY>BZsmzrdC(%wX
z5Y?`gA|O8-4QeR<`j+B-4T@vn88Vy*bX4Hp0*7@54l3QBz^J`#w*`}2G6VsN<q$9f
zf#9nK%1rlu!%t_a>_F+BM>iX0?-anvLwfRGonk8yUep)wFJ+LJK~5?AcpNy8Vt0~P
zpkPt_QYZile$YQ*D$p7Urko|Xh=4L8(ggpFTGE-In!5iC!3GS<Q4*I-lUXfb?>t^z
zfJQjn`U;yW5FERILz<C5Rvvmd*zE{N8QSeBzEPJKgN!X?x__jmyuz9GA#Q*11NPKK
zToo?%*9eoCh??iyr?>F|3Rh`XDon8KknS#njPN|v$GCohp<Ja|%`8~!%$1u6(rGZt
z_{<|MC{V;S63>3%J=MhURzqFY5aHUYUt;ll!TvwFwh7}LylD;bpwl&?RP+>SW`gvs
z;X9B`>myTzRfI-x0uW=eSKwC0@zJt_=htDGDuPIb3U3bF0U(J?6@7~rpo<p7dC{eP
zY#;pG_+M&Or|@y#k1f9E@<QBgn9Zs&=JL0cDo2##XHbN*I0GH`$&SE|g%+mu#&@<t
zh|2hfK)@Kh-ay?k$&pXibR9(Cgqy}YM%+o8L`0;lmev8#NT9jk@Zm+WPG#5u(&RF2
zI%qTA?nUbz0CllR^iw?{SFm@Js9<<M{{2H=inq}ty81Mp*Y*SHh1}`qZmP0f!plt>
z*NvtBATjWmik2o6I_Tn}%mN=}LzLSKl%@Mq<=?yrH>{sR&trYx1BE`6%va@u0EtH4
z15tHu)HCsdS6g0BzK;pnyVjfNs0$r{L*judsRhSr?ihiG^!TSVkR)Ez;{B*H(ffU6
z+2UN@Fx+tSD#%2(VGUWFp}9}EtyN2$>RH_ybvj|P1fC?p<~JY+PY7Jp74r5f&ch1(
z23TI}m<tfrc>%-5ic_OKO%8XPcHU6n3z%)Y{BYv)OxF*IOtEO0%AB0PpQ5+b#E|Dy
zVV-8*{(9M_(OG5W`#U>D@Hbbgi78O}W^e9Dy`~gE9!8Y0fK=Uqb54+E|DgW-T8-|_
z%b?)%h{npi);?6YAb*+%MJMYkWZ4f?hHE=tVtq;crDONR+=<v7MV<-Y_jFKdA{9;^
z)Uq!|dNZgvDPrFkz<?Cvxf-scg{-~kI(K8vUsV#=zXB;hFqGqEn0>!enynQLJ#c`{
zh4)a(Z_qLFlN<!dwW{2sZ7|b!lzh%4+s>Za5gyRoOBGb&e6k}oO1@tl?D&;vQ0Y?P
zCu^7Hq{2uMauV96uAG0E{aR2bm9BukJtPC;PKaEOUw*mO*M>oRLQj=>l+fOpQabUq
z+l*%+KMuQhOU@&lOsb$VCpXVWq#>~|g~F$DYeC8UhSD#TAA3)-1rlUAM<vE}MdZ5i
zAKV#Qi!`Iuj4mZ_5NCHL73K2s>`&z<Dn>sPWp~{X*5oUL+ediT3WNcl$Gk!4qW@k&
zoD8(5$kO}}rqOVrG>9bNxGwO{bit%cTjQZTwS=fMU)@*I&zvvl2k17fgv56!@>wdn
zMr%L<{aiS~moN@Oo)0v2rldAbR6+~Gj72ei3+o8i9U2U?b4I3Ebu36V?&qb)@B?IF
zL5S9IQ;B`Qc>p!x9WX#`LO;X5>1%=3RgR*w)GN9}#D6bxqPdHy;BZ5NJ&8gX+E;M)
zczb*Hb<PpI7P`b8q2nO7Q!=ihEG-ZhOUu`RqdX8^=16PDUZJB2#-q-rvg77eh}S3$
z)$=W{N8lKcdyc_mv{e6;;8_6cu7_pH85-Y59KE_OWr<aSIaX#eQ5@Hd(z_}`G+{K4
zQorff(<A)>X<wf}^d>PVB^HRni;v|se>tpF^!p`$?1O~dkCu*?Kh+R^&<!y%<|NxL
zDGfWuhVe4R-6^s`>CFS<;>*nAtMhNuH6y39{+v)KZ9X8|=&dBypu5xvDqUmHR=AuP
z*6)5*X8Gx_9V>}M&EiNBda79G_uJ;R<6Te3<F1}oZj9)=3H^vKDpLa~u4k4^f3cWV
z=yDMopzjek3YyiuD633j!`htG$}N&Zr@|J5*A~Rq0Lj<w*Aktv**7Bl`H?i8=KEQi
zuqp)`?*tD=4pyGiIvM9a(o(em(J!m`&NJF&<0vPH*^V)}Yj_!Ltu^SMvuU-M<X4SJ
z6O6x&cq;f;Fw%)e6~8f0RiZB$HTQB%V1Gs`*}$(%l0xEgQPVHD_^40q!#SL+?s3k(
zN`>K84nHP2WHIqc2|nw2@UMRb%+;=s0&5_>**R!x`k_DuFW!FzGb-py=_R~Xd+o-l
z%IFFXC0ehH!E&4=sUt?r9xp{-n+2(Oja75?pPmeVvTW&rmDi{LJ0Jr<Tq~sYp1*L>
z0TI~aXyM%XTwZC>N@Wy$xZoj}qlW<d?Cpn6umcGk5v>fwLJ>dQ1)Y9M#lwG${h$A+
zp)~)4Fz4kB8{hp@PBK`C6881F>4W|0sbBz&1V1$NxM#Vy;E<s=%UK+=L~;%l2$?Xn
zM;f{@{J0SE{b`5$b>jOU5gHp+X(7a8$4iQs?^w`MmfJt(2`KyDZWpM}b8hz%52T;w
zdIDMQ?Cq!jH986-JOI99-3Q`IE$U%9P5qdDqRJNjtr;NgWY<}d0pXZfABB95c-hhw
zj%sy->{(0OwJ@Z=AE%{`;X?g}KqM0!WV#cLZheE_9%<<FhzISM%v9)gLje9&LQ(fI
zz{<@6UfwbZhftLD7N0ZSBE7(`^_526hJ$Axw6vsY7T8XlJ$;#$J`~MWK*VxzDgcH+
zDrguCIL%p4O}?jKoFM?hr{==sL&V!$qm{^RoQi76hNf>La2Q9-gR8A&omsi#FcB!M
znb1M*2|%6lB|YhGa(=<qtzX;v(D%;TpX_G*6VBv^`SSj$W0e{|EA!$ouPHYs(ec%r
zP$OXo?eG-Z32MhZ4va9J=yr1CF+{UE{}VqG=3%(%ItzET<S9Zoh`s;~3799OHx-N9
zjzgT!I>YCj&dPe0{O&(Y!x&5%GMfvD=Ur4Cm3R8IyODwiShBOR&0S?3UNC@1zG4`2
z-sOkaqqulX7Ddubv(AYC_p9P}-e3WQhvEX(J)G?ERakyR+DG$FA%{D0Ls`9&C6y01
zit#Mq@7$BEc$c+d@vOqvd%gJ2*E_M*LOlH&4n{__n^7bTML;E0Ryh?dNOjWBg?Uoo
zuuw!n5UO!;FZ=H3pF(KW?-SL^T59+R%iLKBb7c-KJ6t^TQsep@;Mm%rA*yxKYy8O@
zSW^EPPN>Ep&fgh<2nJCb-P`A1I`aae<W)8PhHo1ifg6ysAZr0SDnZt6iTcvvAnNsP
z|9PL1j^qA#x*bC2kc^akp#Fh0%rZ&1)!IMDdz$(Pw9(g~?nx>2K;tS)@u0wYCyy(&
zsJ(iZzJoCCMURx3cZI`Bn^Z9$B-BokGD(zS3suLrKoPaZ_6bv6Bea17^$6Lw54gvK
zJ9ub6R=?wabY)vZ<dIax^DH7#W*JfcsG{*}&`jd$?^kWn!1HeH#afP_*b+T1XD##@
z{{FR}e|gLT@6Pd1Fa`YpJ?(ZtbV}>aehXo`!YBT14#R#SuNObgC_;ye&2n^A>LWYo
ze!KHgFVfY8|8kSlt!%mT9Vm&y@ci(px(mGk_O~)^oRsnyeNpAXu8n6XN5)yl?%Jn1
zaWqMIToip|c5hOC%7#Js4#!^><HvW<YAOCo;f=nxvz3%qmPym_TVKi-u_2Dp{vE}G
zn_Nlx>m9CkQqXb_A?AMN{oHs&S^nguHZfG@nF=5gE+ufRKh<_wO4K{1T7W@uh~d+e
zU7AcJjL@`)#y?fQh}LMTnth0^xwzu>)<FARN6B>I30Y~7VUNqP?;8-FEAr^yZG&`9
z49#Jtg%l^ibp!~6S)S*XPBY-zs+m<=XZc-iJB6H7{80LB`!=EtINK`F|4OE_b6emQ
z2GQbTD1+tN^jW2Wml7LZu`bT-_n<-_1dPT!Xap~wd9!hizIc$hKMJuW%Y}A74Q}0!
zl(Vr^4liE_oy_YUcD%JE*2~-gtSHN%Es8PO%TB-=gw8lE-Yvg!tt{1oL81?5mLCkZ
zs?LI)WA7St$|tsQt4`v`lEr^(Pcv-z3`y6Yd^_j@T-|{L<U+;4jD0v+2mU4mtim>r
z0#bCPh%V7vj4`LqNDs{L=MYAmW4rT)!LAodn@nm~`%8xqDhvG#M3(9L8R1{uVJpF0
z+XD83BD7)|Uxs=bas*0I&t%vI_Z3S*l1iXHzvi$^UQ*vUlROTABIu3!jEGuF7Lp;B
z$~#n1OGa;j4wJK_qd}DB)@YBK1qUK1F`*?-EXIoZA}v29aj+$9L-@0B!bL{fpD#CH
zL~{-0<Beobpi&wd094dP>msXm8yR4!f~w>O94}X64TVw8yNTt$CTj>)RB?>HK|rnk
z5MxX>9IVQw<!Q>cSW4J0zgiG3lLQjB&BB9u@`&gOJY`{{3NgVMP)xwya!hACtU;z^
zlB}0kR4ZK|W06aZeke}_G+jMP)EI0rNDCzJLT6MiwpLljoW!lvUFAA^{B^7U3Merg
z_E7fTDo!3WovyJ{?2yu2(aBW28NBta6zZpoc~@s!!O{n6D;&<xeCVNMP-x}dXwqM$
zg>bxyk>iK&gps>6TFaA8%IQzdM^Jv4hnu@?zYywCXQALI=5D_*2`8QmR-NTC&1(ha
zDAMO>F(4OP>pAuGcb7nV_t?!XNb154VzV$lTJNmwSP@7DIZ8@AO5m$e;p(+$2R|fg
z;nA!#VyIi~Mx-ksY4$_OikZ)Vzgw->dwCyLHOIDLbLhnS1ai$TwI6PA&hM%9f~i|_
zn<hVp!C4eTXMV3b33{V%S)7P5ZsiGo`LE>}ds@<5oE4n86U1$kUIGat*=f4_TQ9;n
zkvz4ej_;PGqlQ4Nkba&V?dM3~dV#{<h!=ee<tBCDy@Mp{HX?YLtRR(JRKsC(TJC|I
znZ<}sG2TQy$afw}lTyz8sonAP+mzJ+GRr5b5;MiWQlF>Pdr&7sHoRIU{Z#u@ipX)t
z-9$~h(F6{TkRE~1Q|sH@!z7oVD~6VdJmIUdaQMb}k7lm5(&lkfv!7|5e(gt~cXnnk
z%|vM8^K$aTQ)RjN<e1@wy#%2dE0qB;_I6wr#oz<zCyTipr%5Pl%hTXP53FnI;~cj3
z)j9!Jz}IFbffU3e9Pt=>$Uz6XB2zYHKdfgGf&kdeVsQtXkW(qMcSDrNRYXUN?aEw-
zgh>!(Xba`Wx_GSB?a{jn92BqzT$m@?e!JD;9Cv)(!9FedPZJU4@{E(1)qT-;mYae0
zLF(y-(9Wa!keM2f^y(f!0HX51<J223z6(~;02j1Rs<qQ;k(m5~9F;Oy8Xz4th4Eb!
z(t=zP3PLFA3~R%+4xLb_<O*0p{D)hfLET*>&w&m!SmKq1+C1+Z_U(agKo}-fuT#_(
zz42@fI$~?uw&LzRbL=Koh|(dp&k=ROO3UW6;hH;b&~aV4yCQMSCRn&D8i$UDi^f0E
zZ+>t$I<!AK`p+)N>)A6pBqlASY(->>?u|BaG;I)2^+jJM{*^k-nH#GivD>*nm}!)1
z;ivacfDGa^c!vFzgTe#B6*q|I;a$msn5G!ZvB1#=c0~*;ulU3X$m@q2J<X&h+LZ^X
zBD9f$J~)HdiC=uL<5Yi1jsEPPgKd?Sf;z(0b9F4fU6d2eDzYSMA*+Ie>CC?OMUVRe
zmzFtT`Z{)3)Vl5f2nP!$PH7r}9H*YF*)zrB$%CVm>rifAsGgg6x~NN_kD>(Qn;w<@
zv{=zEPl=w4l^+3{45*?2CW5!8_T%=T|5DrdCGa@)+U3QDkZxH{;)!FOFBx|9Wljuq
zEqwd<=6F_QBR<J-c6l9CdnVeGXpuPZr!A0U6{-T`PF)#yaO&xzHI_Hg4?)R@Knp3-
zv}c*M1dZaNUK?QibwPvQpYH1FFs}5ei|2M?h}AK?R@~4p5Fwnxl)0R`$Z9`@S0h0B
z_|kl$apX~O+lTr+MiD{8dubyjOtK96%xO(?=9=lTCFmP&QZGB>Bc5|EO*d)?hE-3m
zieK_JKT8<oN{7>b?M)*y6>b~0Z`;zj^|W<#0<(&1&Rmj)yh@rd6v^Xsb0e5nI}#YX
zrp(`GX}SuwiPV1yQj)4BSvFEdqs#AsxDJt3qX1qY#r%EZj{z_5i3xDv1rw@VMmNmy
z<wZF>R${o3e*S`Q8G}0fPaTmUJ-L9pkRoru^ZueTnf{WBasGu_Va}?|A9;kk9d4Cj
zmo1K3g(;y416Bq{$Ndr{<^=>UVx!R$Nm@3PG3WvEGikJ;N)&`sYgRF5-A*0ibm^!f
z($IIE_k{FYY4Ui=`BlsB97P-|1uuwH_#3uZ6`i+r=wt&WpP`6W2Zf%8mx~-GEab>H
z9b{J}pZ?)%^k9LT?eV2eR{2JV>fp5Hld2fh$GQBBknn@|u5j4X2eF4=Hi9ZjwO~>-
z87WPd<2{Tu+3=S1*tD{^%`{r`Z+tpLfe|2BB9zbBR5RRUMTJciuYR8(>OAeC=Yj(;
zgJ{#6cX;qxsgq`;uet~v>Q&XT!QL>^$w^ll!gM5Hg{1o@;3DZg`jP%RCKZS45g|wl
z5yDM5;$jjhLdTIy>_lsj9cn&|LNt8d->{spD>~%M(@j}Ak67fFKB&O(iZ_j9Sp=Mq
zjM-o|`9$F6g<6&3K3f}l9!2mlX@1BK)&HQdUkJ}x8$#v!Gv7V|dXK3C#8+U4vur2h
zEJW(se;6vFVK%t6kPAnv$KEyml>YDV+UJgDsM!Bd(-3(Q1%NX0H)t_bJmSH_rGgP-
zGRr|P4TB;^Bu|naHlC(@fsFWvfe|C}q+@W8Zo7LPWkCJUqr{*(kg?qn<f^<^e(mpo
z9WgKj+VHgsA`Sx-$_XA+_<bJGT#3O5KnnYyj87M}paM4{!$Q`miu!#U1FBh&aRp2`
zXK+)0oNN@#(F3)r54sj5VCp0YKIgB{4fWkwm{hbj`E8ke#v>`c7)&HeEs?3w<t5Xl
zCJ4I~;C!D=RY(UEc<Xui@fAY@)djVqN%oaLIm&tjDt#gP0%wlm!A2*f)yu+Yz|lb0
zXbmEwSry?7VPt&-57%{l;>GLMX1o2y_R5?>!Gjn=nh*>?U%o77;TLCb)PbtjRHD@6
z3vLmsCd9l5RHDY9!v=%}roMlC)9d|ds`(NGeNyB0_#?1`_d^&6?no!U?;y}Qqj*##
zL}f`oK#A(hM8gUqg9Uy60bj;9A<t`I(m-%KpVLk5ZwT~@D@;i?*=zN#XWzM@Ekn}q
z${Oeq5a$GV>%E)zx@BzfUHaKj|60x(iiGPIRJ83ps~RDKE5&#iaWAROLqPf!KK<wS
zuVjO7H=xM?kpC~BkB>B6L@$8g)%71s83>5?^ZRH0Owsoca(RHb7|g~#;HGfLpP?z)
z{yR{`5W_mDnSeK75>o|iBcw_Kk}NYI(45Nr{UHeD!J*ayQTPN&KjOCt^rHvoi~FIK
z2hgJ_P!|JWEoJcHZOa&Pm}0uc(r*VEfdW1+@9Uhd?gA|q`2m6qtD6m7MPARTFnZvn
z=c-UmRelEI%v$<o_J$FYOoL~p+BrCP2x-5AjF>OS>o<_ym{yp6UA=ev3Oe4P3gEm@
z^9l$}QVJoegqi@V2(;9eb1ngr)dX%KEFWolQzdWyK0q3_>Wy0fbhG3JMEe^gE}L+m
zUxY5L4_(GRBqyaBAUlYD3qB2VzP&8;HUX3aKf9CQR90?Pa>Oc?X(Ck!q^V7ONQSfU
zDtJbAdWCdyv!BU%n@a0qWDuwo+YWLtF>AiTorvfZ6r>Rd=QHOHua0#|9khI@+wVih
z9kavH3H+ojr~%#i1D)>(>V6TzMpy(>v;ibuGr-6S22GIN-#=dF$<u#+2YMIIjI|f?
zK+{a5f$nf|(@N8M+$QWw3CRy^fb~y)K)?CLjM-8WB;ro6$Ynm}J@M7>WLz>%urhNb
zWM<trr9n($s4T*^U|G!6VQCU#^l0)^wg71igxuT!3&(TvlHQXvD{N6>I;KR<7OG}Q
zTOUL7H2M3bj)p5!-qJPY+9K7dZm?mh4%h|JhsF_oaNS)Rnr-!WU7N}Dh$3Y?zj-lE
zpcfW(r?@*J*#f9N!}!}`ZF=dSd2VU29Nz)E4{%us<)4r>Az?Wp%mV3nR;HtpRb`Ev
zf!d-`x{1y)c!MS(If3fa1y&dHa^~A#1OrqbLeHKLWl^rrF+aKYHxPE;lbPNyRRWF$
z$RxfZd_f5KR@$5?ZZCa&eed|JYcLXFBXbJt9#(E(d;{`+A)!C1`?ue=H@=$;sL7Zq
z1y}XNVN&ZXew<O@c&;A9kwH9W&-{r2OXLFK>J=_whf=^xT_5`NKcW7tHtN)`%TG;I
z_TlG>s=$(IMojlUR8T%7##!Mj+r(Vt{}5@j_uKIaFGE<gU~a4Xh~35bTWW(4Yf~pe
z?z8(F@_IlS<X^K+&%MbL6<gPSv@BK~w>^fUUjMer^>Flr{1;5HwP?(JFgU3ME7;sR
z(vGlBh{%vo($75EBBlYDP!=y+Y+@3g|LG7&vlpE=?A@jc>O`oJ2B)J!mW&-95ePnc
zNMx_$G57RLa3>VkxO3`<P09qj&Ug@=uuj{1Kt^bkgZ87yIJIjMR?$-9$GTNnmojl~
zk*3?ShXhAaTO}9z5^Y`cE~gLNiyWr{_=xtxfxPJ)VxJ&?zIh{>Zo=X8s)D#d@WbQE
z4<$Q$?xUbg_xoqOE3FNWeF`s~c$esWWN0ts&lhXuwal#E?+;*4p^6{8f|zj=g4@Rs
zu+NZ**vND0+qGfE#5i8yUHm!SZxbYA71O>nUT3c;_3P5+x;yQKr(cR}t#4B(Cj^%o
zU7^~Fk%K_dTTtBtGz0>I&l6m!i;ZTy&0XXGg*$xuOr2nbD3^Z6EPE%sEMY9k{wef{
zT{5&_DX|SnmN-#)WlwIE<g3Y#OqUvpX%>G?TrS)o*&_o#EgUS;Pec^doN<13+J_Qp
zpvz6<3q~=XRxIe5ZYN{DKzpfS+WW;|t1j8JkIPVQ?A!F_u$w>?8h6ezx*J4UgL_*6
zGA{D7F}Okw>XwPT$a$g=>vZw$E%~*W2W<X#YnvZk^$p@)aE@`+#L9Qy(jCO@bP_PD
z9<(*^175AO3@Km7)Pv?_yxpsiG@{C=;^W@Mx=`$f2vZC4?4G++LEM^KJVS4XUyAal
zE=Zuxc8y{Vy1tcB^)a3QVh;L)rkT|JZxK&x``Lu>;_`-fhYVE)i5rM1zc?1R$gNVc
zVz`Wz&cF*n<{*(!*8B~IiUoJCHa!aEa1B3dJr}#e=3v%{VK#2dK{OTzhgDir<(x4W
zkE!?k_n^n^^{J+KNu(7r#C6xVS??;ilbrVA<Knqtzae?muncXVlq9q;*ZQzE15Ft|
zk<&U=&?9qU;x$q*g<eu4;nO(BSpq0Tlx&G!wDWvv0j(&U1|Mg)+fv=p82%SRCHU?A
z*I>6<<a=)uN=<1ba2D_0prDWleu;p?wPv6LC-qGa`e_#sG?xRJ<`+mw^`7bS6m)G1
zDq2iy?-^+GX_NDLcjdSjoHYso2>(!^&l<mGzJ<$6gK*5AN9%b!<xb@DW3s*rT~N!|
zLM@QRW|ZnmO+@s(IoB3!U<oU|g#-aR`fNpWbve}%Lc+<kuh^(vIM*jZ7b|12m&1H!
zev96ZJ>mC4L<grAQ~XmL;wZuzr5!;BJBzbl-2{w2+P~@OasL#mNm(TjL3s5?ghzZp
z8ugvvz`RBC^_T}m@+~f+RH7|;`OQ~F%*S&GIY=LxCI#pPUp!&`efqe@hw|Rw_Q1P0
zB4_(0@mn&~9*A7f|9(aId7BDXy7$t7%9ZWlp$GBB{okZ<PqyLL4yNF}?Bee~lm39&
zIud2N<zLaJP3_&sc&l(S@1fV_MKT*b;jTnXD*HxokA9#mx&@y$(mOwcfK_AMGD|mN
z{h}o+PtVeg)T<K!;!vCv;uUcf$iYU>bF=~y;7h?4j=i<O#+wwW3z8j_c#-c&9#USl
zN|1J=ukZ#X5f!g-^J!Aaf`e5S%9iPRu|FGB%KQO$O{hlfX6FNq=SNF#N8l`O*%Q$3
zsNOWhCdl#R@mslF*-|hGI3@N7U>II~pW`OG5haTegc~xiF5D?>4`MlglX5k4)y8wP
z5qCJq&Oc|NHH4=}AY3_XO8SIz&#-KTzQV~|&kBPl0dX;x%_zjWDvniH?;rS7VmTu%
zdIaXtkB`;#CaHX(K)vQHA6`Sb6gFcimW>N5EJ!$#bV82fstLU@_gYGu686vD4F3Gq
zysuk0GUZv!n(D`Eh8I5?S~euHSv)N<!y045)kz(8czKD#DV3+W18q5OZv?WJ4kmOB
zwklv_rfT_Ab?m&*+oY_UfYe5q5&LbWE}RJ`_Rf@6Rut0VZDr*5ED#WK0>DwE;}%E6
zG{^jSa<>PU8I>_Hos4Z-?$^Xp5;Y-#UfHk?&RaS0xo@E&k2gSfp>nS+{A;#4nkoJk
zXDM3YPS$YbJIRY=%#z}sR-0wNqeku4ZAjNoN|9gmCUuQ3&-w9{zJZ&URq=#~$CM!R
zc7^EKVwqiRs6zldeP!b(D55u;OXv+jo_vy1g16P%veq8AnoiHAgk|druCOW}>6noA
z2LdH8vSmTXMBm|!{ZAD&qGZTE;3s@o+2W)qmK>1m_=89FT!PNTm%fKT>gw%X8<!#O
zh?pS09<E>pKOYjupY>-JOvMws%Oj<eQ=E(ZML%WsIHoCJPgZGk%%w`&JG0XeNGA^D
z&V7ik#-@{~tS=06iQi_{u{ao<yS=keIPmy!U@CT&;@yG-9lUW4#hoA0&s{M53az@w
z%xzn16ARzhdzoM-I@W3|_F{I`#V(ML)^O5}ed%2rke4_cFil6hRvd$-P)TUbu1U4W
zy3m{%^L*s~)s^}LQ*Kmlk@c2W2g;KvzD4jpt(Zs~)<>3NzvHq)v8c0eZ@c@8l|n4<
zx#u^5I?pH7<YM7K`P#?`@)=@H1i1|P^^L&8plYJpho`8bC1`<vdyQnfvT72CVx5On
z4;w?FY2MKG(O4rjO`2PojOX0+zJ#nzfxaIZZppBp?IvUDEnNH*QuJ#lI;zl><2+<h
z58j+Jm%KEa%ceE;9XJ^bWsx-zz2qfNNx#<lP%EM9Tng>7V)&Go@w|u|pLLmRJdC2f
zAn+u*9_N?VtZK-WxexvEVv&MOf}B0PA84d3b>8tA2Kl#p#IKWkcgfy9O@J>|*E6Q>
zHPVI_i=g=0?ISuX*LFj_+F=)cE=piYH9iIp?PSEh>@hiCrF*F5XOE<Z3JPIzq;2=F
zRn(iz8oZX)DOy}>Nat<njJFcU<1SQEqJQ$;)z;;iQ@hp&9k!UvK~=m7IpCj=Vr9sP
zayvv+-MBj6Rk`3bVL!2*`cZQ7mhoNXh@Ghz2BpB-F*bgN#K>MQ3(BHb8cq?RCf%J0
zKaJvsFxQfAaI=5UAeeve+(s^?5#YMqz`Z{v;2MG=%v@Zhz{D=ahA4T}b+KgY=RT<&
zyp4mSV?E@dEoT;bV<2}RYhryGOYh0hKp}QFRXeT!$F<m#+sV3-rs2O2(@k_Jz#iw0
zxx5x!yE|TOy_<RB7M5u(ZCW!{Uulh^(OWSph=Ow6kLNT+;&XFVFNKTLdeI;rJ5XiX
z62hCTHBIy6!>hJlV!=Tkd_xrtzS+@<$3K4OHgOt9CJPydg{f$Wg;I=!PNY>@cG0b`
zWgf>`bL$(Zu!hAmDz1|Wby{b55e}9l<1Q1`JWJ^1x{@eH4K>D;9^#FpW9GCxJ&?T?
zM$5R%iZlN1kWDa}dQ2jeY@4_=V_d97+cOemG>vw#uO<XHnH;q7>J}nUia}8!I0IGo
zf<Nu|c8rpjhPmC|0<Id%C;?}bfdVO4*Uzvo^C_GcdJ=r2vI2s*(jyBSAm5e07N%hy
z#m~*qJVN_001jHg=-$wCfvbuUESpVP@q2SxsYGfr3imTB-Gnz?BVs65hCZK-5=?g|
zwUvsOcfdGz2<`BRy7;2lSoGaU-?tk$lX6lD3n6-NLib5bwfRoYdSHJ!$K70bB|0zm
z^!YD8v@5~vXPmE3Lz{)~5HsTG6wt1SuCV{Ufmg*)d&es#@xJ%up+6{=Nadr%x2Db&
zH0SLYW+h|F#E7qP$x&q@pH$9>MpIDEne&)CUvn$TtMr)`sxAUSX|@JYJcm~paj|R3
z?AjC3cPz>wfm3r-$0{r)VEia|lGkSB_C3TusOWArMne+ENo+wt*h!EzN8+r>Ma(J4
zXy=~q9!`ED19h87@&aqSn>twnK}gON{a4qEXL8+KMmo8*?3D5--hP$qRKou19*S;T
z-U!^i)p&Zw&FE57Qab*_{ix9nJhOO*Sjw&l_AR<D(kr=%{9pBtT}-=j2tNV-ceix>
zGr87KvO00famVzUTFWBSZEWzy^+p_$ccM@4K3BmvuND13k*m^jt-dB&m%(q^hs#WA
z8*hYo-Mo5>(28=6bZtK~!$)Q-!~7J_Rr~3blM|^9&-R4wtTgr~%{?YEV<O;9P5MDb
z85kpA<#Lf-%GY4kkacm0Ps5kk&*@3fVC~>=cLZk|+FxNmr~W|}b;HNgc~u*aUbJNd
zShb4Oj&jJz{3(_lF+6`w4<++JNzTUKBMKcKaud%mZ}5w>R;j~$HHDXG0+-g9IWxP|
zQh6PpYJgca)4&kH$&xM>;>7M2#Y!!mg|x(;tw3~P8KH=njjD+TM{|M_ha6e27AsSH
zxs2Jc>x8FJK1*Lc7QE-`ar=4xLFX8eWsH`nD3dPS-rEs%7==nKLjx-_K*oeZVOQcG
z8F^ffKphO`&^k06A&nHF=VM+vjxgmq*U6=iSI0Ox-cim?Lw!QyD`}mys+_Fs=}He-
zei`?*(6AGMr`W`i*n3t_8P6KMt7UaTsbKn+aqD+!p3<!BBXEzK!kO^8SE0(3Xi-6t
zHzp+L6;E)}^v|5*wiG5BcNB<2Uxio2Ik6?Zj0LF&saR0DJwj5eEH-6v>O3?f$w$o_
z$8KKeuX+|pF)+23)BDsI{1TAw#vqzQ6eZ<Up$3E>#MU+Wz8VxGNF^JdERsgK9~j>{
zu08P=FpG;tu&wGOM>(U>qo#9&|HO;{bbemt^-!<}fUa4GyzP-NP#_NgQH=Y;P<|W~
zfv?P+SM2zd571g4a^a8f4eLwO<o=yyj*Xg|s{9QS!CI~b|3ZNnB5L?xnyBezT@)S{
zpsOLGCTYk&(%b<NXLff_JB}oBz!SF%OVG%WDew_YGBH2yIZ*)*eC+5?1eEj;bk$tl
z1W{8y`e9cH+=d{*j2X2`0U_awH7nF{P2EEf6p>{<Z)liJ*cA(qqOTm22S|GZ2#NrO
zfK{G2u4VC`$$)?rt-V+p_tncV=<iSc5Gc04Fsokx&shg<8+~@~(<8WTUuyHxmYkZg
zcnl*#Y9Kw~$Nbp*W4;};RS7(@TPT?fe<4cqoqsSV!Y>--90(9dn2PwA;#cXX#bV%*
zjJWfeX~^F7{dCms4oEQ>vU(;krs}Hg^awSBB%c_GhA;teCq)-(&ctJ`0J8b=R*K&t
zfcynn&)+s;W<C}W9Pw`8_E74`?d|Q(-@4~<D7krvqq2nZsWUAf=j?FE`XOmvtkSM+
z52wQHTd#EI-Iq~!wvf5RkP^}458^EVJa{4Iyl($(4D_WPKf#=7H({P82V`YI1@Vtw
ztlsG}(&TOc^23Ajg$a6Nuvog_Lr_L`wASEnc1n2IeY8{~=S&*^{t)@-V4U)ww*-_i
zitwlxZzhZ?A!E}3=vR=N4u~xY@c2&N3ok4=9E}J9=990#2c>*7Xk@$uG||=qgV{`~
z)!8Te{b_<mQn$u8pkyt#RFWE-VSDW(c#I<T+*GR+>Pz6tfvDPo_NgYql83K>KV^WY
zArDzVL7^W9*C|~$3~4#!%zJ$XX^KpK0K5f_7)TLnpfvg$x?&yK@7t7F9{zIZ9bOo|
zKw!%!do~n2MAz?#0<;j&IHbUTfc*Y)K-x3_RgmMp1QKzwh;rzg&W3%u7S72$WX+|X
z45do808n{-s(C$uQM}x=3SmpH8v=z_aUHN+Fe^T7`7ow;8{rB7KgT>OMNjSlwA*=g
zT-=<x^b9F$f%@`yo*b`^F~FS-JfNmUqqxzI-`51d(zS}We$0ez&}9W$Db_7M?*ZG2
zkNphA!iG+1sBQ7)U#IHbl>1!SX>7v03h<av;d2Cz22v;_{Qkg|<@Q?8f!Z%^EZ_mU
z<1~HcU!Er0ztclr1(5aRQ=SiJQv41xym_7b`pRLEeADg2Yfm>HDe$s){<iuWXy)%N
zx;tAy9SA8OdjEn_{?Bh8w;>lmm>N(GMgUeML7o}~Gh^g|0TkS}(Ln;^(|iCJ+FQko
zZz<S95jTarhlt5;fMxT=Uy#V(Vv$e(DSIBIhNpZw$uutkCH&yWc1o$@FF>}FUqc-(
z82Ax)avlO0SJ<7$tUpxn?<0Q^g-IO~vStr$D?z#x)u}#tK*9OmtHhJiSHy-iilJ7T
zp?mz)b<rGPSX~C(a}E&oOu;Yr&M3H@!p`GL6?TdS>wr3qvoTY#EilaCV|A1FHK`Xu
zq3ZswQ2Ds&_jn>)olJz^A#UrK*Ab&SwXy3E4rGuegk=4R&dMv)nS^|v;G~zmFmDe@
zV)t`E$4~Z8QGt9hvTtKhc(Ar41<QtXph%t*;V$$ESxUa!7E3Z|SgM>Jj$`hT#y=P0
zPh+{P2)I&(H|R4Kpz**0yO=1AC{U*>{$7cejo4sgju9!PTfodi1Ax5-zwi%0(8ZiL
z8*pQpeXmCXdtX_?pW-UiJN_-DdqO^a{bZ@RgsB14MVKWU4}-maKqP_ifmvkds3i7G
zZ<ZpZg=flrtWyox@=tQS(!L>J_BZJM9DV#&?O|aBP~Rh;^*xSdV_h##=j3fKJoRt+
z+yih_b=RE14jT<2mJ?m?JEQ$ES(}LA`W|l8bEu$C0!5r?Gzbd1V&~On<Gq+DfRmDQ
zga7v7=8g$LGKA2C{7Jagz^5V>^<5;+A4iIt`a^Cak{M8QA*mqx)SnZINctdrHBimD
zEq$Y^QS15E%2&g>)8tGNtB84ndkd5h95SG6UW$NqpJ{Nd_=Lah0!SQ1f(7l~gv8oc
zQUK1ck%nUd>_l*OARqWu#^;4ko<58oD6H@dwW2Y24IB7Kg*=mw*^z+gDIgY2aSfM%
z9RN$OYD()sKKwP6j`hU~LDf`pHOi>ciJF^#en$jyJH7FUKxGaQ6LnkPBNdgHJ4GPi
zNsg~4JbQxZR?;VcjH!MAI{UCHrV7B%lSbSQfuJtt6Jax4eRLRU-9=70z7{}>#$b<w
zy1^BW>P-DDSQS@#s&>9k0XgXhY$m)u(FM7P^|N90hpVC1{8Qk2!doya@@SPq;kH6s
zG$|t**fYT7QE+zQR#So+`3a7vHae!v7dciKuo}7Y&(|5M&0VJX4&@X<8U1otty*Xv
zXm2I3FAc`t|0VlXg5vQdy&z+kqkR!Wm>M?7(GeBMXrKh&>jqUd>`!#t-0Fq|i=+q5
z*okGeNr20<tVpH^>Rx3{7{*QXFmCI6N4!#mhlL1sb^AGslvN$$3^}2ueDAK|+YFm4
zjMWZNWJUL1cSW>|592m0fsSBUM=<c(8Pq;4%Ul-r8ZDzt7j=?Wddc16&ko~JcwAz7
zcqF=0m#ORut;x3sLy-&-Tc<T0*n6OG_mqO&CVq+6S7=%!eB^hbzbz27B#lk==APY?
zA-5l7e}Ch3_f$u{O;3D=LXAnfgbxcx74V;2f|VV&e7;Yjx8v($Q<9AEPCKO2{_Kmo
zu7B0#k;g}`$aj$gxo_~^9lOMoknVhNoNhX|U*I408U|F0xVrmG;%VCRFe#}_Bv%w}
z_`fwz?Wp8Vr0vO~H_qb|=wZmNF4F6IVx?wDX{H_#<N&YYne%@IDc~eG(PDl}4?U(3
zWfI@6@;KB2H-I`wWSm)AJO&4a*St?K%wuv?C{8*ST9hk2z=P>8y4ru98707+YQQ|R
z_wHGM3*A8Sd3j_lf%quARDoU3@VW03E_BR~*&b~|@OOVSXgx0ZSH6AY$jw&jP{%6U
zXa&FU1->rr7e5<L9dA-T6q#qv;~!q=8xkr?1ebq%b^80i72f_#<cO_KLZ96nz5LIy
z%Xb5*u8)*fu*P91AbueWEBb^S$f1Nlcvi)+c}x?U3nzaLwa8&nI&_cXFsP)=-^K|4
zb>AS&JvzJb6m!e}{W2A{v&g+{78ax<VnGl*tlYcTbhr)(pytas@0LQ?6$Y_ioeZf*
z#9{1*)aRi9-g@eE+EGjZfAcZ@QvhF<FjPKz-C#2OkORN}Nh1{Hmoy;u^HcbQ1Zo&f
z@FWt5?JUzvl|TyMKR3$#A;u}zfeSC)z-~OX{diMx{qf_GuV1L6(JKc1((K=LkxgDJ
z1H=6Yv3jwWN0@?8dPdoL*CrZ!a~No$u$oYt)qw-f69|#5eoN;+W&b=3uvE1|4yn8x
z#U(CBl<&jXND2bN`ojAPf1@joB<Sp{%uRmqZ&(Z;a{`)#;7P6iLoxiLPs&2|zMnCR
z7`e#NAI$%gmkPb?Cz5<Q6!r(_JTu0>&s?QolrVy?0ib*%kB6hDC}RY1pS<n*{-VqV
zrq5Xyqt{t~_TW?7-?_uWW9Yb0ap-_@c#`)cMoku{f-l?tbF{0lA?>U0>r7t$eN=EA
zbVVikGr6OK1<7_;;n{7Bmmh7Z$eU5m16sRd(RPT_+|fFP9l*&H>t*t9t`LK<K$sYx
zJ8Opiu8Ro7LPepe+~VIaFQ`Ob-S6+sR{q{%2bjgsvRb`8RE|Z)WH1Id(f@O!V*Bq1
z-%}<H{=3;6em|cl+=TOuy|;n?j0X7O@a{(Tno9ot@>EH<iT^)6N6+j3;PiZsFuPo=
zmDW*V{hzsx!3+{ba5uR8-p*0H`Fo^T`yFUQ`V)KHC8UwWwRJByeo#c8Q*<`Q1C}aA
zlZsH)1=Kt(p(45l6ajPCU+&y5(Tl}Q6Z%y7cKgowe($dGhKfFH38})N22Nw(vi6eY
z85Dc%t4yo1D=nOZu02?cWDj8(sFwNd_np?Ibn-BjM%XaFPvjF@6I5BZZb8FiuT9JK
zAAaeAUJ+=x*dA9ch?#pme67}U!q8G^zf$*~4gqLjXOW@Fio9qA83$gx-8|FK&1REu
z!c<n=YUfc}#Y&4SHW{|3-f|+#4fsh8eU%GE1!I46{us<WD#;=)oh-Lp07>=#nRLX>
z@uR{&{||l!I0!7#Fq5?Z{30mr4E_BD$jBq%t>QB`IYb4Ic0{-Y^Z&oZ|HX6RLud@<
nn7MKP;laVOJ&nsX_6J{$SfIc?WG@y6{-dd?r&6tCANIchX`=41

literal 56296
zcmdS>WmuKl_dN~+B8^hgvFYyaF6k1GZcsWTq`SKtkq`kXX#r`ZLrRd8mJ*TXzczTz
z=Ucz)dfq<g;&=|*d*6HAE9M+y%rSR_s<JFP3Ly#%3=F!woRm5Y%mX+W7`RI$@F&b?
zuA)frA6OT4SqYfR5#nv|3$mk}t_utdDmL_ASeVRgd~m^68%-To9VJBpGY2~s6LSYs
z3l<MMM{qR^jF5)__+L8<R}+Ybovpo#fQK-}-4z1h|Di9lQb6u5aeXOFp`)Y<k#umj
zfN-;Lv9M8ypg<rHA!l<-0d*;vzi$V>2~$|Rx;hH5vbwvwv$%7zI5=Cevh(xvv$Aop
za&Rz%E0|q8?OjbgnC)FC?{D(YeWWa0%$#i;U2PofA<+Apm^!$*3R6%(Kj`0o?#JnB
zWBH#?vUmCWSl|I!p?|~5&cep}@4dlWg`lqrNIKX#I$OB7fcuMZ3*BA$-(LIA=iFbf
zYVF|a0A|72#!TMc)xsIP+0_I(IuVY)@BV*Z@jvgS;%s99p8D?H?0?_=Utjxsy$~z(
z!T-xd+|Tmet6-W%P=r|jT{01rsx^*97#J}ac`0#C57@mN&sL?cx3_+I>I97H#a}ed
zX*4LLdxy-_>E!9<acsoQ>CEW7IixaGsgyXhaOmG^%4L*9VxjnmT<7LZZ`QV5R*yQ4
z3SWMA84=lZyJ?&@d|mNz#;bC7Z^oy6>1BoA)$bXjTNN@)4A{Rva>8&{Z@<TLoBrpe
zv&XOlkZaO#f28}Lw{)-r`8YWq=z8!{V)s8Ba_tW&AaM6T^iRb&=%i^4yKw)xX*}A*
z!9VW~7mLJ-M|(ORF;gY|_ucSRK_`O$yqhBa4ftK%b(Px|6C)rU^Xx~O(R8^kc5R>Z
zO}^c?db_1I;p5{BwM1rbzl-gXA!}W`NHHq#7K(zm9AE$g@-QS-9!INOeW$%d4p(br
zw`V^amL`}8>N?Jj*7{;e1}mO0KC@kDWVg;>%OLZ+Vpq9c2_x%Fj@W-Tn94l!!HCRY
z5$zqf=kdBk2#SHnN{HyqnOT$b4^pqK0yLXvG2vqC&`+}}N`!uz%<?erQtS2UH%>QA
z9YM?COt)`#4M)ka4-gcyc$h!MwJZms&ez!_QOd@iJVP_OnN$%Kc3c*V!e#En8*FX4
zy_jhaMI&(jHOzZ3S4*psEA)GJhE=cLc0*X1IZ8|wPB(CaLE~<cA(BYt`c21E?=;W}
zxx$-2CQ>Vm<cW&z>=ZDF++O`65qu?dxmPRsq<`<KE9`M$*Us1XVdCLd1VOr%qMARW
z{G<HK#aSTDIIx(9=m8&J&OEDl?)GbAxVX>yA&cnc9;ZtLV%hVLFWPS|7}Gc{hdMSB
znf2-ynp_&}zMXF7`O#owqwX^p_D&J<+GoED`B?1rdxzg;gGud8_ttn}PMC_{`H!?2
z!`F7FdvmPqSI1U)KGVr1Fkf5Tb}d;R_>E-q*Sc(u1TU-Zz5mdb$?HHGp_Ik*>2kki
z7R>Kj9I3Du4y$2yB;Vrxr$67LSljSz)EgaFOu9ldrs)(@d@fI&_#Kx!#Dfr+pVw`b
zy{{Dbv*7Zf?czmAn%(|Flk3gZuTh8DsujV5R<klhk#O(a%i}>-BBB)f)XF?C`K@cQ
zg1AgNs$e01jR<~t^>g0g)9+`ceK|s&#{r0>je9kN@q+nm55$8BY-ZnUf7lq#(rAzj
zdz3_@L^0mrV8n;5YC-7zBSjm3YPv#ixX3qBtQYL#FFLs|l;EWxui#aAtn;ZAQqyJI
zz-F+{|Fh(`q<?IB^QZlGSb@+pcr!|yJ`$c4?IEf6q4*cDh$XJu6PwvC%C(lma`Bum
zrs%%!r!Z(`a#%cFY<9c+b0F${<~T_xTAedhs<xJQdqrFJBlq?GIe5%rX3`6=>1Itw
z;J<8u^V|M=S}%b~=pp8_w{n<RXa4b~L_D@)@DPm`U(2MK>u@I4M+6Vnd;Gf4d4(>v
z3T3_!3%-h$h0xIby8g37`St?rxOW<~u2|E_U{=@_?Uix_QigUvwC9ds+f9~fF`}uh
zg9#l9LB%ul+Ah*I^rYUV6A^s1C-%VY;5!L@#H)c6M!XSjY>Ovnjm1h?aIn-|u3M_1
z(Px5QCv;`~{g90ikx9g2tONXrT;bPP)!q2=2TSd@`z;5Ry0tWAT#>$Ikw|Zq{6#I!
zF#OV9sii=JQ9vUW+miN3UaRSOI`Q{pIwJVsrgf6Im0+%MTCFlo_KYj3&zhyGpY>x2
zI5*xzv2fVz$;_%<oNSvZe0QBxnzd|{;$pK$$<k}>0W*9trJ<5BXwdSCM95=o+L@+`
z*`%tPzxCL&xhb2|>XA_^o&Q>2TvE^%afTRt_D<YlGZmpD_%!G~*hYd(ME@wS9qFt$
zkNkAD8nr;-mC1rB!DZ43uSc?m!qw<1+>g_t9}GK$tLy+*(d&YlTuh-RA~dM#Cj7{?
z>P=WNReeLN=Py%*wd1h@Nv=Q?t_CDD3HMB{=h%Gxd}z`ZUy{$kN+`AcqILUS4d3x2
zxX^VS{)ULioIxl5>0QiaMKad?T-WvTtUrM=`~eeouRvh&XT9!k$!dqDJZ9uBV9O|^
zu?Vk4LqZ+xSHidFKfWj7D;TC#&aO(E)m9{2L-b{D8M;hVV5zpyN8VF0Ay)A|T3zoP
zlwl?|>GhK(Gzf`^Iz?u1=I*C{QK(aGN<)^1gN()|=)S*B`>BS9!QMoby^?w}h%RpY
zN6=#t5EbzhDM#|SR`mtK?7OPgzcj5zGM1{>S*vw=DH?c~N_RcCOpf{GgcdwYllj98
zg;Hf)rV%b@)U7C{FDAtHu<v(7yz`Ug#1eSRV%u0-0yULuKMU2V-&$(WShp$%qcLXS
zzSz@3_DA*n_(!$;)%j)~{E@M5f1YT&)5RfNFNvfO`g2%6+#1T#)@7Cl<aB+H5L-}1
zxL-#QbQM#|R}qrYPq+D8vV|P&&b2Ttb|P9ylqn6xeEOjX|LTFbas92$LSuC;l}+|j
zn!a-gJO=eRZ)K<c>d(#)t6yN+!pX)`%*^Alaeg?R9!;`QnUIBH?t-Y)K`2{%EmcE-
zMoU=CVTmzk#mGPoN1zN6UROeyIM}wcm~=W-kVq3S>U81Crj1M{-I~Mso2|mQcnBTU
zFWTc6-tYapb7v)0j6%o#l-(cUUyaj|zVQr?<8hbA9#z0mFkX>!4u`W}Z04@4WDAMF
z<#4ZtIhyWk?2(D`_*3=4cs}Av<;wf~czh(4S?_R{pG+1bagLy0B)r8nddtCzJqy7y
zKZTkt?6|SPmKjTD?QCF8vDk;0#LQ5iadAZGm!j2T2iJ344cyfT9SSCIsaV8RwOAx`
zqyR|j3Yv{}40OYZ*&;zm;>F>|4x4{OTuC7$NQ$H}MVJoE`utgx?XNwm!0(bbUJ4f{
zpOn!_vdkiz7JK+R|KT(D9~kvQuTO1DNP4t7+~o6+Lud>U^-xu)2oWw*-z%moDAHJ?
znqy!}Zv<%}2R+SShQayXAl9Om-W&AY^lR3~YgK;?GFK{-naa*|m$sXWmpsB3M(r#V
zPjkX;wAp8<pRxB!uuFWL)xcESWyn{+?5BfE6eo~Y{|Uz29R}lU^|ef^yoiGXX(cfE
zGoINrSv3Y4>uin1;1COUFuz#_BtN~wym^qB3pO&qk>ErY1ov>RH*8si*waB@gZpz2
zX$Y980XjfS7L<aRWOmAL1_u&~VPgcF=kB*!4AI(ZD{-Oij#L?|h1whAIpLzF&?IM#
z5wCtA5DaDaJ>6d<>e4aZB)~!avZYsAOfhPefj>>yF+nppsq;IOXk05thw(zJioXWI
zq;ErPvP9)EYo~S6tmB&=<Yup{qdrs|y0Ib!yv}xg=3RAoj!>A}u=BB0`K=Nt@H&fy
zp@6|l+~(m3bTn)6j#lJBrzp*E;8&#dcy}0nWJ>crL$8#Y!PG~b%TXQ&vp{IWK-mP&
zO}9h3O_x5@gThXvyKVU+xgG9%!L*<ZIHoC%-KiVBl>@){XEOW7iR=O2PFb80D~vT!
zKlRvH2#K?PI!Sz4q*EvpV_V_Sex2%6@V@_ZdUqR4?`nv@@!a|LWXXWL2$D@mQf$$7
zeaIG8IgVNX>?|7NI!S6tM_)#7HBt2ersP_evNHaFqVHIWDGaT1c<;DyF@9MxyTHtt
z^vvdZWl@^P)UV4t>Q~Ea4U<%IyOdB2DK7&vGyDD1>$$ekQh37-4@Wkfha@zxQ}|(Y
z#ARY*&np!l!7*x;C8H-eEuy<cSUjwjK4D)PNM>JT+g8=mKQBdG&5&R=EOg>A<NAiv
z#8AAOSmj!m?6h933-i8r!j<IB#No-qm_g;4LM=Wz*iYR-rV&&c9CTPu8SXxx;?NJi
z9POf=sc01nWnWxHqDK<uFhu?XVb8Jvl)A|k{cQ*hR;DU2viOx<s+A41e`HmXx#@P6
z06s1qfA{7uUZ#SF{$3)kT>W2O4EJwGmP0syeKYm?AEX^n3SN<pQGW6-F1}I{<LF=d
zRq{RR?xWxG!JZyc@zC{<O9^}bw~y$~gL!R<KuU^tYk&S0F;QG3gU;<9HHyIpVs;QF
zZ?Js-yhDE=3WeIQ|AE>fG?16^X#+`U#}X=Te?Js50xoG)^f!;k^D1Nfi|CzbASLN(
z$npQ<LUZ(E4%kF-!>xCfe;>6w1I&ye4J0b<i_P>kz!0kMta99*Wm0S1LwyJeT!~7a
zWp!PBOaz?4gk}PF|DdL2C7P+d@9nkwMHuPvgu>S+ceDYC%Ku_TkOx3v-5T@#G|%P2
zlq5dr*oLGx1bU!wuTVD5Ydde}ep!OaJKUL;>P%3!o=srKYYYwISXH9cOhZ@=G!5=1
zI6xJ?3gJkJXs7a6BLF%|OMVv%46G;ZAJcCBj$aHP&b#keiKy7$o>~}i9#)O)CXRlE
zQBCc8mIH8}5}b2nQ3RbLSWz}dSB1Z?W98i4P>!P?Rrv4lF#`Co<H6Ix%OQ9clr*=|
zR+;uh#t`w!ut$T?{uKbO$jCAZ==>5Wh=E}WQ3th-X&5YF<Fgre)Y(jWi=v@}IfI0N
zJ>i+*4F+@a?=V&s#gy4uu^|NOA-Ovjp&jl^J~IGdfS+gOI!;?-1-38S+;*qSwBGyv
z{;CZyK4rDdBn>BiwB*<K+M^kqRuNd#GI6AKykbTzuk1c~{Ra53l|ni)nnZA#mr`36
zY=y-8C(^llqTh%CWWHwwPg599+&Y7i0SWWjc~^3A`t$XE(<aZ$X(spmg?>UTz?cd;
zO4SPY8`omNwMrQrMc`4dFaMB<c=Mg-`JB$V%^36{FaxA@@b&#nqthBLi~d&J=l5w)
zA_a12{~+3J*H!i4XRgx^hOEO)J2$7_9A2O9(Hk_oT5|1srLssva02w~n=EE34WJ{U
zuQL7|2te!4#TH(pHg9bMw<jnz6K~}gR#gK#Lr~p+&btkzvlp8K)+Y*(?_!I4gY~#6
zV0lK~M?L(T*<BSszcm;+^*s6b;wwexeDahEW;iNrTQlH5&}7f2U8(Q1?N6GC1TS*F
zo+RRPama}&u+}2dPq^MfuMsw>WIT{a4RX%qm0unpi#blc1P*QI#qSHi>|X==(GSTI
z1=G`Nsb8tzR0G(A%mXLgR*xfaFuvnzNZdmm=;|N3(N%-<w)+4M4E6G;kBt3LhbV6Q
z1RxmwPfJZMwEXzNwl}&~Inrzx&nV33YR;(~i%iIzH|Ut5TgZt#AH#-q_I?YIVe}vU
zu{vD-oFnM|>J=}TzY&EAI#S=&0cw%?1&WyGou^6MFQ)+~;u|#Mbv9(UyTuU&hy{4Z
z&XLgFDgqHZgeM<mf^XUKBWvS`m+D4n@-iw`Byj%jck7ciS}6heqsXrzPF<r7*EfJd
zId4xC)u&^Dp-^DRf}Cu)tZ=vOZy+6kAJPoHkV|Gi2O&oM-X1UP1JvhfDwF&H`x8u)
zr<!AclSg>jPrMxfu(Hv1j^oOj(`v-~=6utk<tI<DX1nh-(qoAyCdBjgc0>h}ENk(i
zVx@N*Mh<!iY8mbiL6F#FvC<rRxZCmhuT3i({2I&E?ug>u909jgHgvdB-G?$GdvmqA
z_)CpWW=yi>*|Pg+1RTLvfCG<ro@`Gde_+un!>OL&UGlCKMfhL{VhWWYtS$&PRk%a1
zp)*41&m49gfOLYq<X|gpd*Z)Zt<Yubi91@0XjX4KXB&$JeQ;|p=JYtqTX@ft)t;!i
zo#;H|2{J!7N%D!COxEAi*a3kLQDxL`_i9I3z(E~LOw)IkAM>_=1~9Q!q<;F1kk)L|
z{~k@j?=;|G{J+8qbi$!?ZUt8Zq6EzUj1u5P{f$5Wxe%=I|Gp4x!N1|;Z;}(szKc8m
z>yH14JO8;5oCXUx5ST&u`kTtaIiYj+zmo{aHpp_xoIN9aYti704GzlP$F#%<n7-m;
zZC{QUIkN#O=>F;GH9{!xO_%UJKH|W-3#kF-6#i%zzeYtz0ffaG;9`lP?m`B0_R!`k
z$J9E1$?tY?5dZJ(5@D8Nyd`OVjB%BsQ9<{0`cazrRg=rfmKv*>>fI>ip2B(a9;0U@
zOHSr<#6!~^^cv_aib6k5&J@fj_0hWJ^4L1gFF4K{j_`9qpNqOQ_R_mtM@0?WM)ZU_
zCz&&=Iw+C5A9eI>5W@ON`=1$*bAqnO`NRL8Ql>eWmir5R#FoRze2u@Q0CW|0uzUPt
zkiOpU_U5oB+nv|pyJ89m($<5Fw75keM4R4afsVUFznDzJ1`hflYr8iHjf~oriZ&R8
ztzcW}BZJj>*nXGIvzp=0zJutmJen)~a;o$Qq|g!;q!@jWWxn4f*88NtD+;IAU~e~9
zV{vnJj*7>!01hk&UHpAjR-?JSF+`gKNhOLI91jp5q18U~1P34S5wSJ+)XUSKMz4Q<
zH0z7y|EBi*4jIWnk7((ASjxi$KwJR?OZUB5cJH%8-|GvX!_SB*jM_Iq;oD;al8|A(
zSb%&I4Y%FAYj-#%;Iy?3`VqRdmR~CkvO4JsuxXX^gY_S}(nI%rH92%$NAA(bC=wbW
z7afS!fUxh+f7})Rvrr4fk38QCo4wiUSNQ$;1e}(xd$UIaw0Xp$zCzD8zd5vjbhitn
zd<0U^Y=I=AGYSxg-ali0I^S}~xW$;D6Bkl{H*ve1a0l(TzG*B5&g=aq7e3oXX{Nu)
zCm#VxX}@Y4-0C`-b3~`!He<LtqRC~m=95DfkDcNpU-ma|L1wf;r6Y%qn4J<j;;H))
z=OJBz+!pqj3`BzT+DZLY%3A(%--hjS-P(=8G}h8alNfwNLsLY7%Y#v%4UA@ba@ZMA
zL*-V8C3LOwM^=k!b|S^#usn7v8V+AnKgMtpbse?eNdoJejBdzgmDkjI*okWQNPynq
zd+TXp6?tdT&mwv6+v`7MJ|~zjyT7C`S~-A;a_P_SBYi!zaX&~fttIJyr`4VUORNv0
zr7M$_GH7-^-Z0ni=G@i|hyy)6QFwn2mX=N~*9Ve3x_74DnT35`1nDtdfg&uHOw_Q=
z+w=7dRlFeBLzMo61{Mw$1Bnl53Dsml-l@yDpI|g3=+3OU3wYp@j4K3bh~e_w559i_
znqLW6x@?z`rk%3)Tf2pEB9V>T-x^q3q-$GFm>U*q9G2R`NWJ<Wt$|EQyT1uU5pX?y
zO)-$v%JUlB<?%PQfxwzg7=8t`x-~M5{JNx+iTh0h(nan4X$XhMpxd)L0|FX0tPc?O
zD6D~a^{iBt{9^vCd{UZ`?|F*0A$fUNX+97tF<}kLH0cG=aKnG^d_{fIQyiY`dGP5^
z75Q8Xm19^zo_*W7r4@}$xe^i$)f5nwSDK3ypuw<T697if&x~MFy~?uk;WN2khSG=U
zU}3TYt*~3!Oz;bXyU3n*cmy5TDLm}}FVRGnzW%e1L~^QKhx)~q0Bi4wc~DID`CDCy
zgD>MnP@LWE4@H}h`wSbQ;BURvQAPUAKr>dU1&b+>)v&dZQ3IMGDdMZ(p%9(>KB+@r
z`Tw(A@js*I9W%LoG`U<Jq#7pR30u&7_e2livR%SZDG?q|P^yRdgB`Uo@5h0JPDQY8
z@kI+@cStOJyttEhGigrp-Go&9&j|r%@_%Ppz#;xu^87!u?EjTK|IeHV(ld6vCdU@o
zTOg{%;1T~zux==z1nc}M0|A%s)mgP!pV#LE%S8aRnGIX{hD{mnm+s%FHvkT#h7+iY
zG%UW#!Xvhs_5t++5Qa{<qTc;OQoBQ;;d}&K`m9pl6P!^5bV85M_cI1=x6BINm>D0|
z6T|`D+FNM)m0^{84hkXQt}Y;&gFQc-M5_YgHK+~Lf=U3F?d;)#%P2F@`+<P}B4Ie4
zy->f&865FJ05m~e21ExdmzUsp%;>u)#S-(gG%g2%<9D#olyt>oH^1yI=zYd!7fZxD
zlAJB*ZVxUT6}|PI_uI^ISKMehXww5w50nA`t0M*Rg*W5kF3y7m2J#x1V!0$K1fZ-t
zd;q|qqTxGA6acX01}*h!g)%@7_T0#@QqB{JX#?<opa;)L2#Bfxkr$*sA{CMw0GM!X
zMWP#007@;#H<t&ZXUjqBw<nV-nxY4-zYc3+i20qnLeYQDf23?z&Jie5ND~3lJUFW|
zf!QGYz1o@DzS#Xx@&lE%wMy&#GyI}{S!FelYYevvK*c1XFzzq?HAjHr9@XDV<7MKq
zQS!xL1mKu+xhM4c0A+f*f+$VLpAEIZA=hh%o`wDy0FI!7!2p;GNV8H5&?%qlG_8B_
zm0<!T5P?^Fv&)}@ng)MZq#5uScfdG%o$f^}q5E)x`qA;YVI0s8LFENRAww2H*R2kh
z1dSO3cak*xn6}%S%i%0ukgjtSeEyc`Ae_*68}cHtA2P&Wxx59mLqe8^sJK)CIlcP0
zbgKT{14*>!{8l5xN#}r`R00S9QoB|55!o-DA74_~&p~W0GHUldt7n6IXb=Y0%Bx%i
z8YDq!$bP<nLQAJ%l$!ioDYXqmQbrWO6Q+Rv#~HlOtn}EeDTX<O9=#MFYyJX+8*tV(
z*a^%rqGoyR7iQahUehAEMmKRS@y!Njr_scO69~9&y^)SWx+V`q90;rq(`)QPVv`tK
z=2)9S-Nk%H7A;(ydfnu6`Dy~N>3V*eYQJ0GB*;!b5Wu0qpkqlcL2S8Y7d%66Rvth?
zsy`^1u*;f{lY;Id13Q6f5J+U6Z_+EqB8dBd8hbXa=dgk!2+keYtr283Ztu0;%OsD+
zfxS5Y@<TzSee-tlW0A9t@7%}>F97Yrv{}D0CgXUB)bdK}J|KgbY|aE-`NNY>Xc7+r
zy!r+&TY?Hd*a+q0OFn<TR_O7*oGzc@o+iPvjkmT{*usa$O#`|H`QH3j5fd3EMM(sM
zllI47wm~ThGO4A0_bA|L;^m>q-v9R}G>Ux07>P4wFZyNfE>y5_I7~Ah6Z5fI4&md*
zl7QOTAjXm8TjW>0FMFvIrl3c$C-R9WycQCb#6BQCh-PkpSROQOlBT9(KJFd`;@V89
zT36%B#5b}Oy!$e303DzpxDUDXdj4>PFI$^t6snSHX0VySafIgtU~oYzr4v?5Z9Yu?
zv=irZR(UymPNv^mJzX}2JOQY9OvE!}JJi1+2G|}w<9dRUShkQS$2r)BefCliOd`}p
zr#=$g$~%e(eXu!bB$hZMqtSV+gkdse*i71+=v+?2WR5iHL1mHs*v;+9xq6xAyV62O
z#~8rx(}shijrKtOL<%nB@CB3D!kf(n;ob7uY#+y>^CwcG2F?T2mEHz-tR%{RmuSds
z##4SE8YSV;FZ4a{HxnY%9Yj0b5!fqB8X_dm(VBl=rkH9Q`FzAvEG64y8HGQeoj&~d
z1b{)qP<aeADs-ICIQxjZSZYoRUD~SOpFl!bhNhD{DeFoZrmJA?WaQvMgu<<l19~JX
zx3K2lzgtyUj%pV@`COh(^ZCL;<mhM0K`slyon-J=Wm~?j_RWxcCK0I9Ih1KZ(V;V6
z&R2DEW7$uiYG}&9dY7mna7VNqx*4+pd<ThV*1pata6~gb?Rvhu0p{^bmWtQ+G8sm|
z?VsD(yq+dK*6sbp7F0zqK-`Mb(!bnGhT&pY$c~PaAc9z4elD|mot!kRNudM|p$&!l
z{jvkDBLxREpf3rOh{9{M+fsX`oqq<>$S0T{DSA}x<o^@eSYRI^wg374+7sdO`qkDL
zxeHn9+lotd1bQqHc|Rg<CQLbBxC=&>^G&``N+AF%iMU_XS&vgZ@}eN=qNN=zT*pz!
zZJLt1pFQ;$e_e*rQB5KbnV6Zh>HKKI)_Kgo<1xZ++;83$xjO86U)lU+rZC)WlR9V>
z$tdn0JAn%PtjJO3%gUoEsTbO*XitBh&ul^G74jL$Q79Eo4BJ*Yu|hC-tNPblf)sPr
zn&HacJ6TtR8YqfYu7FZ*V@%l@VTd7A*T<sM%F{{B<7pHt-_h`6Y_SK|AVUm@D#x(F
zA{}P+9uSepGZ35rY6Y-44eVhVDm2VM;I?Tx$hJVPQgY|k!U#CQUgnKi;@zQ6K_e9P
zy_&5yn^d7xdr=P1@>*|9!pX+rN*7ZclTq8%6K(x*pq_x7DWk4nafj#=0M|(FuT3)d
zQIL3v(Y6l2!Q!iLb~ia?e{BTAH+e#yHK5=QihLHkuFCj^pS-vS;W1=vo@s(v3&5dT
zE<`7acrNU7;fgYn5K8<S?y5M0ZhIX$!4MR@K?)<}vc`Cw$?m%fQrKbFV^Qxvi|*2q
zI4k2I4Ksp*`pXZWdTqMc_ajJY!QniE$Fa{ob9qbJn;>?~*JC-7J=f%71=l`Pp{M=v
zg;uKq2)!c!9b|A=AOP~0)=TgWxn#`1@$<t9y@uHCoZt|IT?J6_S4gH~_d2n@%sK8O
z^E&}ro8~)pK#)LXXV=MQE~52f=l$H`f+0xUpQNL3^=k&ShBLYU>@N;d`>b&-0|!Cs
z6}aKVCbZ5ELJbvw5#1VcuDjF2Jf_%2e%EdxC{LmhC~6&7Iz4}FNaN4J-Iq(Ddsa#U
zB5yth1w|lrFL~`&L}#3UJBA`KsOEP7fS(C6tZ6q=A+7JlHf^jCKL7%bdH(?dsBt!+
z@UElFpjE~Nutc-l?&-mj-`U}crg*Wj=f$Sz?Htf>bDdA;tbk?2?`XBB_#?nG*;aF@
zfEvZstQ_C75*tu`#!$hgQKSP%j$8z1g9)D_K_i?9i;fC~GZLtUz2H%VU!Q_9bnGW7
z&1+zV;IeQ@{@0Ha9uD)tMFfXCBKlgMil;d0Fh*9rckX?Dg-O+C7_&$K%%VA<y5`4t
z`n|r8w`>h;co}+tLG}#=+15}SM1WcP4T%21WJ%m``*v{_OWT<#%jRg_s75N2uBp9=
zovEx#H#)Mjs<e*3ZAF1(*c*pN{YFRfkG;_ZX7=o-VAG5Ip3>Mz97&%v8TwIa&;k&Z
z^+bWMX@bNd)E(o>tx50$1RJVH0r^YD;w`fBZKCI?NM1AMGCDMhsK{=NpFRWy@3ac6
zQNoSf^2wW`cT#Sk#J)oKvfz99vme5x3gE>k5Mx;knvK!tfz<+tb~?s!4X702qDVBj
zE44U---{?J8CihD<D>sv$A5@95aad9R=`9jj~Qy%Q-GJgEC4o(7sK~cW`qHVY6H@E
z2J1skMNYx&@C)9hj5*AEfrWQ;3{0zB642A2Mj}k)DS$pUt?jnZGIaET)FO+IvCsC#
zr78y!jq-#+6G*ne>(HY+n8K4F3BNS!t?W>M$pX|1wQ?QyER|0E#DgKUOqyg`Yu{!L
z<YXENq%~|2ZYGoMviF_$Ap-n~A%{S40_GokZ;vCa5jWm!iXE00fNC=(g3nw`6#&<)
zUn~bbmbhlX?+Kb2!i9XY7{uji5L0L>=iA^Y?dJErYP2Jl#DeEBDfC)TkQD)5nL(Bo
zCdqkC%D*2H{{r5dHHa;2=ni%W(vHe7CN%*J8H|*Lo@f&-15a6)-8hs;JvjgIWzLfv
zn`aqL5x0YuH6K*~-gT<R<hlm>ivV8cIAP7eY_Lv5=;ZJ;$}c26n&WO@4FNLy3V?YT
z9(G<>p3*76J#@b{9kC9t6_O3$6F7qV`DCRp029v>bg$IS2th3eP|@-ye!;M0DB3x|
zhnO<KQ&nlN03b1N`$_$pi`Ql%m>DI4vD04)9xIhLFg_ff0zPZY1~=2^s0U9cH@wsG
zs}5B6jV%^PjH07}i+e;O0O$wX)wB9xHso$>%mjF<abDBmEuH+<!VBQ2QtF#!47dCl
zX1v43yqo$rV}*-hz|quY5oKVmk;u>b0Z`%N;`R|JSwRTs&bVxYpJ`Nb6@@TDs?(}M
zUxJ`hjCMZl5E3h1$hV77jH!qpkA(b&o`;&^I8!NGiM`d#7S4Q+eKsK1V--2uVgvW(
zBC;co)+~kfLZj12is?uZSvM_tt|GHJt|h|@%-_)|QNac`8+$sSLeTW%V)lpSNa9oI
zF(HJ#*_4^3e)^(FC%9Zw5q?6t<2-MdY-*kGSEzjaNXO#@E}-On#=<T>lKaBfvcALc
znceb%q+Y1Sp{-@O3JO8@^Rlb+UuG87?()X4rowmz6i?^3h&C7VZB9!pA<0$+8SgtH
zuxZ0lnolMa49@FRo|r48r&w1qK&@YLUT~ETRcvcTR2<<X0y^r1WjetYo!$?^UhuP=
zZ~KsR7bSUFV_`Vn^c`KB5Srl1o0K3ozk?`qZLYFf)`6;y_~w(s*nz&UKc3@R4g!WD
ztopzS?@KSlTmcv%{@IbL&>hJUT%00EFp)|4Ph!VBd<eS~CWolr24tP^dF1vZV`JtY
zAXMM~yd?~vszX>QT1A`E%#w@w2WUw_KEWrB5hfeL|BH(Tl>C31>5wKbm_^&WCW?<c
zX)r@~rJMv@d6>nYm{P>)7WV^HM&&p5ocm97+j|r7L;}>_BB8>uHU0+4Kz3#ngpwRe
zbbTu<sTs}E=h;D?K>u-$?F6<g0}F}_dt4*V4p6sh*$Hi0X!YdO8scF$iKOml9>|B!
zKys73gL#kw<W}GYs=Q&=tqIO?GsxI(mV)_c_boLCb3%*JAY_g?E@o?1q*$XwxiWI^
zaW(-v74grGcPlNgey(mY%wS_5s&ml6(n=d(-5n`8COF&FLA8awgx$UAY<`!1_YEpz
z!kCbVN8h4%*Oi*TtRR9|KQt>6gv12WMY&ob!X}Hk?OMioCaVR;G|wH+>xhHrSbHs$
zG=H@gCkpCwFHcJ>ROW%9VHVM8d34czX}D>mRrdpMrR4ja<cauX7KYh3t{|MQHaxk`
zTE>4GyVWac9_9!%Q{L|$Ya3Y(_54&kwzFfUYLr5%fa$+~{;`btaWII~jx@aA&MBUL
z_x`1X0u=}VccuQkt%z@_C<yqD0jN3h|MdTr9QuglMSU9-Z0yc_7>z#J8mHXy^$@3x
zV)ovCV8t@7f)&6+Yl%1*<7q#JJH+fefrF^d&JsIM%;%WTzuEEKjfQV!rz7A&HdhZU
z+Sz6PdTW-yjZq~Is_PgHMAuCox)N~6XVk8t4G!Q~d%SbW<+>a`{v-UsWW;3s<1izJ
zRvn;<QdkFWBP#0;2Vf_%i>&*i<nw__4I&$P2@w>u@GP|>D=SL8TgD3tB6!H9la59$
zNLTkLtv`9CD@-HTEEz+^+!+HqGX4RbY%H<7NVj7!W=+?2Jit!h>TIwtVO*-Pel9n!
z@&-R>rBzHn1cI$7s22wF#E}T5%A`jDwB|I$>;4m4iDJs!<lT$uaspr8byVb$IA^W5
z3dB6pmZiASPYYqbMC)#xD3(Rzv%_g|nf0P+zuZV1AMb*Dh>{<?4+Q9Bj0;xl0r-KF
zBDAL%uXaMA2ZxFg*3=giHz`exhV;mkG#cy;9*IezFzHmmjQ;|1ca=O**QQ;c{Ai=7
zhaM5(*4vRB!4J<rqCCI~qo+ovKocc)7Y6tSq<Idyx8!E-x&6U=U1tH0*l0z8B94&|
zU`TZnnnVFyoW1-@n(O)UuIr#k7E})jMX=|3**$C)CmfooUuT^V5V{>AdvdUpr!aCT
zgOBMGeahkfb5u1++JuRB2<}UCFf&;?($~O`YU5hiCc_SJh_sOYGYhfarp(oD#VPWO
zIPXv+Ai!HwHKH9CX$<uk{N=y}NWo4dp=A;x&k(<lO&^O>Hkx_3xIicy4wDJcrR1+n
z;k#Db*egu5mwj$06j(7ch@L)}V{rluG%5d`s#d2fw`GGP`MAm2sDQ^I<U3iKY&no{
z%C96ItYCEaastGBA(NShZ<z}uB!E(hU!wGq!((?+RBdjJfEvO|F9+E(=Q&z9Tzc%%
z0Bfot^M@i7*2<a8j^&LN;pv6i8#pdmHdw&gYSC&pU&o$y*5jBC45^b^Oa&~|L0B&Y
zrU>qYU}LyhOOGIg>tN4`SQ0^fMEXpHW6$@P4!h+Ft*B&4K?kAk45#G80tFOuwpRK+
zU8xxO?W^9Z-jl=?F`R?tc&|YzpP!rKP!*V{w(zZ&e|EZ<3o0Thhdm-;OTswn3Rrq9
zbO;kT0Ava#`Eo%bPD?aj_FwM|KM9ltCUq*ouJO5iUF877$9<HUWQjUL(1+hU@I$-@
zy~`XMc2A7BdjPQn-^H#gE>J{ZKO<4xDG$!=j_G2>!ibAj4%-e<7qmz<<ZEXOZwP;S
z{l`8@5+4NtW~u|oWJs{Da1{c%SPI;~dxm+C2$}LOx?<xpZRX?uhMIXFBW2A2WLcgT
z&(Zw@*rL%>g51%VLr6Q{t(PlYIIw4!0jQmxnGGV+MS3D22(Pr*(@ba>t<O7#xb!-x
zH_N5#_^^Ktk;jZal4W?=jYQ5thGx9QKLEj_yJ!4JR007YIr#t!@<{s`9;;zAOec+0
z2MP<(rD3;zz;)2F5hv9Ug4CO3B}sN6KrN91rN~}*UAX`JIP(TS6rHHveo-JzHri5E
z?hFMglYDWM*NnFz8W)Qn1|SHTs?$GMz;?EZ{IgaiEoE6pEN=`m!El(I%bW5Eb9~Dh
z{Wv7I3`^9ZwhDzQJcWljm`5A9&SAfu;8kl6hkXx=YEIFQtlrZ;mzd6%%EJ6D3yoW=
zD*iM02xzM=5f<V{hke4Yhm1QqCY^wW2d$Q1Rso0#$dBoJ;{aD<e5QJgK;tp^BQnL_
z-~Q?N(PC&7KwRt>*Y47J$v2zTVa}6F^Cc|B703A2LJr{S2ENV?x8wBzK-AO`etb4q
zQ>4;F`rSo6J{GLCg4adZt<J&K4S1nq$H!gWkwLbkA56G3IhLqW+?aoY2=)M`qUH(5
zV8M;$C*m4mebWw9p6wy43gUK7_@{!L*bK}B$*PkDBhbt(CV{bR5gHVPhrh{IwR$}+
zOGd(FJtp~BSZb5a4dXfpFZ!WsYr&kR<Hk@1ez5*Ejd$=N<W+gZMLx225VCW`%~}eX
zHOiaUI_;`L7K5qO(?kdOYEs>NX84r)2uQQR9n!|=z7!O}Lj}~0sbiT?WA4#RT?d?-
zx77lLFH#rQX#s#c#r%hCNhy{F8jxY!^rz3l#`SolsZKV*_H*HfKesda=m){|hor}=
z?kV7ySefeYktx!~nG4Y*RL99;VpE3NqTT6EpWeKm=j}(9vH@q740HJj{>J4PKStYb
zI9?k__AWoZ{1|dCrlenEB9p!VYLe8$Y8inlmIJrI;BcH`q5${8e))r4NTjU!t0y#g
z0sUSqfu$a}PGe_rikS+gL2RW}CCcgi+`?1~D8nYtHe|nVvL!vK!-Plg>)<a;?yM6N
z5|lvnvFut2s^5-M-1H;<NwgkCA0<h9UK87;@mmh_kUwDkk}`(ZOBKCa>L^TvEN@f?
z!9DNKdu+UF4(~lqwzo&sX(m`m$!zYDizuABl?+lOnB$7X4TEdA#Se)C#d)h<ecWqN
z7=SD(>Fx>G@(e~U{yL$8-WPa)CO0Wn|7e839L=Ax^P2<}imF=-k@wOQ;J`)K{8g?X
z^2iw3QhPk4l~g1~0-;41+pZnzYS0J=MPP7nOp9p<9KYxYlHuD&2?PIy;#@$-d{a;?
z@Gmiv7Zc;4L#9!ayniS7GaM8wRJo?qTOWPPs<{l2MY822^Qg)1<IysJ9vAR4Ar0PR
z(5QJP3J<V*>&qDyR;F2kz3q}b9=%E*V4x(X<^wwUX^B;wC+o0!npqzIexDhonzk$?
zljNw-Ui`1K&Qu=UC$%nVMR$(^;K6q1+}O0w5FZBZ${~bz*;2#0oO?;%ij{~?O(cyy
zH6?7VU-vxG_Snu!YqrjZ7gd-W-1_S?i1R;1f7-@BynyM%z>#I|RAo99XYywP$UDx7
zWX#a6knF53wUD=S4TJ&GkBFX2E%KG{${J|@9fx|1*a>T`CS}o)#4<3U5VH;iOuX%j
zA<_n#0nmr+XR87@<DtD2r$4{Bbkx9pblp~G63));!VCgNE>PCxC#ufUBd+is;onpx
z#lvb23nEey5tVmo=Ab<^p<AVr7%|`Q=;V65R6s#%;6jM`bvPmpnBb{76(6^sZ)5@a
z-*KHht2C5#z~JH>h~3pkLV|W<KkMzjanK#}-)8jv-kVF(c))*&EI9>|7N4j!5J2d9
z)Qg_32vB_{|AsSt7s;d`u)v9t>YtR@XwYfaW8=5cP5k``U_ePfM=v%j%EB<2SmJ?d
z?-O4j@BG>vm4#gZVlc7nkZyM=g&>2FskyV~*`bNAfXZys)|;q)aiOE6E_-Y2Nz83F
zGWcOo<j8xy&(DR@W1Uo?rgY9GELA?nS0YiYdkmmqe`cQFO%q7zpets!*^RMU)-s)|
zlOU4#Lrhf*=<49h48z8*PD)`l%7s^V0JiKn;ZuA>R+l!{bn>lxt_U^X5aO^Nw|TRm
z^lrO#js#frX9q;nMrn)SLa4@py)Nu|CtNpVU{l|;(6&(s6lMS=j#vyof!0j6g(L{J
zjej{b{lJYRXhF>gAe;9DHF>_ZW>Clm#xf3$ek~@=$7&3eGSSNz;(8*Q0{}rKkvhZG
zZWD>Cp<!!T$%J989{vOlDX2+~D`Y_&A9x_9xGwuC<BZwgBzbqbPMnG(-yO05eC&9%
zNOngu(MR+_%-v^MLu`k1x|l)x^m_G7ZRF+{fVx6-tPeAtD4c-N?Pt}|yA|y>Z2Uk5
zO1ri};d61`7_xcEkDdG(T^0#*_b0oQi4Mw|8_HKn&8Y1lJAO@h8CQ@L3DKoYUXT~`
zb8&=c(<o))qnl?r(-?OmCOvr?E@Gxq1`2O>!@BCwFbOriL&qfS(k_CjjpyKM^>*YT
zp8hg1TH35cKlu(pOpWba&8WyFT8w(Ls|{}83hnANXiln*N`Kz#E7_W07mQCiA^uaK
z9&Mk;RvGj$t<#&p$J3LQh1GLsAw5r|qrNf%^4`ZDImd{m!3`V+&HiFZ>TvAUpa%&S
zPTlFyI*6t`RD%NPs5{Z$G{{u`+tR4BXE%N{azb_C>O^w}sDJ+I##TWi5vT*+JgF6o
zis+YU2~zP&MNd-{pFHbNa@_9@4~ig8a>#O{X6wxtswyAL<1(}FJQ@3HJC>Mk7ykQm
z!Y0#Dkp}8Ue?pfhuO?y87?DG3z(^M%PFeKveRKqt91gnPj_V7)Ju-ZcDg=JS<pl<e
zl&-8?5`VgmW`;LG@)*bZ9}I;9qjWK44mmm>E+uiFrt_H+E7mJb%s!WB>QZE<NC3>l
zgdz-7qht@!A9L_feGBKrEDIaqpdqQFA`+^(iGon+k$!v9MK!tcHY496uum5@>^#zX
zcek>IWtib_u^5906nJ|=YY*}Q8EMCmdoCQOBPm_S#G>$6X`W|rNlFW2=w^%h`344K
zs4%_leCqn$a}yK4``CF9E)6$9m<+Tjt#pPQ5%>Tr9w2WG0aASJ7{mbhy~;GSU2OlP
zSTs9$wAd~>W8^1{Da4~XfQ$N7@&xyJj2sE>udL33r&t%9HWO6S-D>#gQp}-qfJ0s)
zSp`DKds~3Sbwakr&8|3>FBEnDGZl7(0V<6^S1$qN0OFO)jq!B7@Z;^J_V)LEdo4jJ
z;Vx|beMuqR?9=(6#ST+N*lAU29kXFf5}Zxmm&F)dKt6Wk2`oTzm;w$lm1Rx_SOw%4
zT{s=$^+2OIMBT8d08)K<yp_@oXq&U0t?`j6B|TOpboXkJNMQoTp$7ZKcyY^N(TmU?
zCz_D~ksg{a$@G$F+@u{q`BirU+SM8SK;6T?x~v=>+%4Nnm$RU+R2=m#d)9q}p-6LV
zcgied^B2asj6zpS3}cl|0I)Jss~z_iTks}he~E*hFj42QKf;e#kJ)`eRmW0TC={EC
zYXkQZrXVt2s?t>7)S`e2j$$}|v!j_Otk+;8mFu<ju8ex32^KAm2^9u2;%T3Va)ddx
zBPf~|pa1N>EoWN_Q|9oVF{xiyiONS%)8I(t7~#PdvNJ+N{;uN2ii|XSs}G!PpKUEK
zFhHq(V?u0176Ap~l6JNVdPwFHEniB#-d4n5USE(J_hdY-Q+UA77`jtX8!aD3Pq6XK
zSa3afTV?F{3($_(;2pzjvjZ2ED5s)UY2XsDmst_(3tf8MpCU*rt5Dw`0geP1KcPce
zOh5M!c7Kc2wXd!(FqRvlme_qKhsfT`Qv(cywe?gnH0|9wJi=HyJ|62&Djx%hz)sff
zNWp;q*Z7_TjgOy_U<6Rn`0Hi=Nt&Bac9hf&O7y5)<K`+C-*Au4qd}v5XO8hQQCFaQ
z)tuEE_|Bq&j5z3&LQS&uZKh+C?(Z^tqOXD(FhwPHmgA&Tm1C3H5?Dk-5i>6c>pQH|
zI^;lz{9J*QpnD0ue9m{PAgq<mlpFi<vif9Rf%2~{1LGmA8G5IlI9;xyc*2AA82iVO
zv8~U(q;^AsVOEoDry;L$Og>}2{^pVKJQzo>^)PSjzrOX-H}Z{rq_!HD)>J#pfEoJp
zI%`bY*OdFS<&WEEfu-q7rMr&%Y)bm4-_7m^_JIXF_6u@14NJa*%I<sCVW>m<uw8Ht
z^{ZK+A#jQ-shncuGZj%n=v^UeK(?cGG0{Q4nm{eFKOfksT}nHFWR%}K;;|>P0DKA1
z4oi|-J{%taxe*_e5`*GT*5tzLgHOP1M=IpOb~FVVJo7t1VHo}8qdjx&EXK-43Z5<O
zD|#Dyzr^Kh?LSb!1(8Ndk9nbCUPT}>bTR;;vU0t7u9#!<Du<(vY|#~JGN68%OsD=f
z+y@0w$9WkijjsuV7xatlOcbG(c+_A+QpsPruHy<K8WTwHWIPb(za@7hGX0i+R|AeG
zhtFOL8*!N0Iv#Gw`15LLEU;d(@6Dy<2!{R(DkZoPU4_~zlt!et2i96TCA*SixJ2B?
z{cInzE%^A!73Ez9xnW!gN{#u7KBwA_zj+83^`L?;JhG8&NTM5tJYsev#f&%_p;Ny*
z;2G2=@bT{81`I)#uTov=$)rWTp2xa{^vR6JR$$eMPKYTpJ*6cGIMEHlayi^P=`u-G
z5xuUfeGLFoU7D@ckPJ??P>%@jD6q=~BVz*-NvGc)aQFg$`A3lBlf41oa(eS)B<I`o
z(<697_8w&GGYYVKJd+Cjiy5HC-~mVw+e4?Cl$VU6%KoN*m=#jxATIb4KvKIsAN4!#
z!(MvE+IB_}%V}B~f%^vY5og1X6n}qV4Mi@n5ESJ3S`QOc)d?^`+}U=Cq}H5KvSQn5
z1#O?%N83B!WXXKYU35rJfqLP{fSX}yM*d1yj6a4<G&jYfoTd;aEXEAIM(f{_k`acZ
zOv?G2F66KBOM?&!-*}DyQsQi*ZY8td;y;$*KsC@Bz%=$NCWGc=JthLmXPHGNQj{r7
zx{0a<MD~pV!^v1L?C(VlIe0j)L7Gmc(#IK69e-GTYYkMFZ`E;oYX$Z|y>r3T50Z?5
zEb+D)p_((^>Cduk3uyOcd{*9H<B9v#3d@xHpZ*ex_;jdZY~Ok$!z3Kw_@QOL8R{JR
z)(}@~!zVtBYYps00L<vp^nO87PV9sW7K}dJ0sRzn9RI2Jy@i8a8q5cUo8U}Z+yO*k
z@+aG7?y<!zJmhE+1%zsCf2jx+2kcFm%KARF$C5W0QS!2PQqAkV>R|riwXu4KC1sD)
zJ{e8GHy{Y`C#f;8=E4+)@QJDj;N0wgoH3kyB$n}q5wwPzpT0Pr5E*$&oG=p3a%#6~
zi_L16*$m2*J5?rKtqs@)s7T#F5|!50{&-JaOjRJr7OKEo1)M%Z$JMiwiXpT&pb)z^
zfYXadfE*0mCh4Ct)9E$RIhR3UE|I*K{E^(->s*9?z6&5??VY1F<h7Im6|ttw9Up?U
z!GkBjG4iHAJ5doie+!&QhCF!2Nr6!MQA2dX1_$-m?it0aQGGnukV()K1mw$?+~2|=
zcCeII0rWMwzSB7Zyu)^CszikhLDYVMuikVX$S*(-0WK&vAf_}pEPcPAn9GZ>%K(n9
zPURH9J2r=&Q%s}R0B46wEAo$~Zs#t3S}I_4G1;HbgPOCTF{~F6l$~ob3b4S!nH4&|
zk}<9=1yxEH85HWS6O7AAV1(ZRWmABT52>4?-lOvFg4{S7;Od&y6N(n;UH^8~30e~s
zbZ6NVFbSUrhB(kB0Nk>%>gMhU`e9V&>A@)?Z8Z4}<sV8^avp=p%`sg;EN?MHs#@eJ
z`vG*oBFujn2q|-x${Q#H@m%djwyyxib>M5J^B3H21hI+`*r!O#`bF^K@G~|NeNZYd
zF1y+g>iy-31zf{<*h!j>4bqS@#|vd^sc8s9^!9LiR-B3z(@80wSuc>v5frZnJV1=+
z7QMaV4ExPvJ!ZT+UC~XNVKRY~xC`8);<MGV{Fjb3aF~L+fO*J6ey6e;-OQTBRlA=9
z*0Q}*&vKtNv7jAE5p1Y!+YkO_Ai-x<+b|!znZ)GY9JF0N`WXB$JRo5-fHpm32r^f+
z4Gan0_Q@ORe|d+(SZ7@qKLg(sA8`ZdYLG!_dK$Vz{?q~(spo2~8h%KotXJ_P%ziKu
zHAfFYjx<d#72p&|9s_ZX%j0mlc2p#*rJp2N4Rlc`)n@C2^@J-QBTveIL%j1G-<i#!
z$kykI?=4zA#+n01S4W|FSN6Ctv0jt|5`lTFLzhI*OgZ@8g*|PXuoxBe_C%-PxuJaI
z8QrFOG#1ANNBAe0>M`UfUF!HBG)hU|JlFR@(n`cM0FH5)2jhtUool?_QtSwROv9Lr
z;nSO|33`;iW!Aoh8h9x%NMIi#NcRm-C4ycw){i@@JF7AK(Nj*IHI3c4a5xM4*2zFa
z0xZhPI(ec<p}}Ud*H*X5>-UR23d4>S`>u_tYBSkfEE$*#+29=$W5mGJbt%IuBVy@I
zrcvq*mMjh6|IK=o0#Ocs{HPmj6-+Y`@T~)?)G4fR$Z}#1bMV<XZuEsnatIbU1M3cm
zaron(N1QS-2(r40+&I6UUrKsEe%T*^PVqdWzW`umEOU)_>ePDFk?_6vC-A9HyzSBh
zJ3r{6PD;)c2Bmh}N?!6EXNNSozW59rh<pd3Pl<vss@FkX0aZ)p{9Y?zr^(0I#((f<
zcdg3Lc=<EoI<ddYdod|w2QL>d<lVCHN5xbn^+(gRjp-GkNQN?UE3Z)qyVq=ayffw2
zK`%jiKeQPM)4MBZ47Z>R`poC~)Vh$a3)aARThHk)?1~(A%9+T|W#HOCxiI}hc>P9Q
z3)lP7`>Gu10|v~PXr#>#r7~NL7H79hmoaus`~k3~rdwN>?TGz9NW?Lc?(&s{<H`Qw
zHyf{Nx0tMmPsB^tmoC@8XRtq;JxH;wRu!gdM68f``zY#MX+@P9`8tEk2J7W2T{OZQ
z+%DUx(gdGiPVsn*wZ$3>)Qh;IOne$^)!d$|SA#HCPoUioQXQ1AKdqM^=0nZg@6_>J
zP}M|0$<7@;-N(HO3mu=$A5`5%$s)|k#X&Zs%=H5OK>d{V1!4Pbm-f?&<kpe6%sVfr
zrl=XKz!z3v`+gRx_`NkA^yK(#aW-r|l=&Nv<Zlc-{20Mc1_e)@c@+LDK^Q6Y{8=lP
zOs^4&|3b(gOm7iI7Wqf}#Y{yjf+Fb-caS=MFDAe1O29rlm`=jHNtM+Z63Hd`=`n{v
zPDInmlE_gnj&%~CUBfB6Jp@q&6!b4qz?crQ={wKAJZL9t>bfKi!#l?p&32au<{zvs
zWL!d*C_Zvc%tRn36tdVXSlG+ghMkTlB%7yqBTM5WOAQ8GSI22k=prw8Zsyqd3Lc3v
z4*e-eLR)f6XFi{#C>5Vdj(!_mJG?cqeZ3L4Pha+g8&*kmYO9nE^d*2f<ORK!Ea>aR
zp7}5w?h&jbX{>q75hOjuoRlk2))4%EvjV7cCkz`R*O%LrOZT?|Odtc1xHJtFW^E!&
z<fd!d;0!G!r=pvB6vn<M-6$*ly{lg#^mXojgjgchm>!Qj?X#|n`|p^Ml84mh=&QXN
zRAW9Sl`)(cZ%w>E1#PszAbn7x`T(jo-2H^d0;=e`p!shPW*`h>57ry9o4w#{)0^yu
z^pni5?d+bhq@c+%ehj{jRIs2#Hs_ZNaDNcBuUF)3oYD>my2dEp=Ta@;eN1KGtZ@3<
z*5r@W2o8CZ|Cah+?eR)VOpwqcgZV!#R*8B3-)ja$`{;!vz}MmAdZYg%Q>tfyk-Zpg
zm-)xXM-P<+LTHU7@12&1qOd^LwRm*j7jvhZ`a*HkEpf>q5_OQe$1%6---nd516I9v
z!ncH<{=%2HY_LZA6b4|+;N)`r`-wMk{;#;sL~*xO&eajNv%^kQ>Rtdg4H}O{Z=RGi
z*h*aOlwxN{iYZqQt&}~xPdNaWVC8~=cB&)@{IhNp@%%ui<p_KK{2y~+I>uNmf+5>|
zuK{!^;;+E&R=VL=Ucr2dP;|Kda_fG9DB|bATh(9RaC`kTb`vmmdZBZ|=KHbVB3sKo
zV0EFpYuNj1wgamV41*!ETZ2EbH9Xo(d0j>Ak8uSByb_UuA^F1wlKyJ-NL0}88-mYC
zm)e>J1$yiPqMab5vyhG)_wU<jm1TM0^Fx8P_d@BPtwDwj9W#x}zio2?>$Ld|w#TDR
zC~>O)bs;bUrf47p$@9NHjNi)-;bJI2$$Gjn0J_uf6z40IyH%HjuDW{NK2YfVfXpmK
zbOkC7Py!^in{q)32efR5IjVFibaEUpQdRS0vcwM183X7+2tN4kAB}#A0*8~`82~lD
zqxy(`5d=_Hui14Q3Y6ab00u4YFVAi&dCYpFLD$zeT<f2}GegGbxSTH@WC#paz~?@z
zSJ@4|kjC=@*x5F|#9cW9&Rcf~iZs%O)DIM7gi8o!5xJ?Mhp(CnLV%+%KX6zML?l>U
z)dGo!Tw{Re72>M{L<USwc+x#=T4yhRC#i@u0sCE7a>U^@uyD75E?Q^{U9u=}szd+o
zzSj#_+(`<40i%2pfyv8l=2S2iP-e>k`E|P@o%}~(?j)LL^PRy1Xn<p<5!F`v2efZ^
zK<9BPwZcg|KR}UR9N~eN{2ByfU|v+}R9M4z-Wbg@0&U&jkh0*&guPrl{9(b^C^*5F
zAAwecg-@^DL5q=X2AbmfXx?pQ%l?-Rgs;h<&NP8-Ec<|;r7-u$mXW2JsQQy@1sKMD
z&?+<!ddlfFih*rm4SdBD`0^v!Cjh5D{v8i)$6@h>3HTd9=YsU^GM`?RF>DXOwpyjC
z1%P2dTYsZ{Kua@_k1K$qA~6IGpj1E5-_>@xFNj87=d$QFLm(ow3vA-EjZTzb-^u+3
z4{~dCdx2M<0Tgl5TwUOY1UQo6S-IRf_|VB^0;{W;=7=Rw<k!Wo905fY#t*GEHj~At
zxKE&tMf)63XYS7d5KXODC?OB1`_5pJd~YtzIL5_rI0?hE6<9wd=ec$*P(y5h=W)sV
z7y2|MlezM>6iwlKqjr%X?vSplw_W~rpc=W@)%yx~D>!F?43>t^G!G@Bk4XevfsKmO
z(AK7|OZ$5`CT0KjLK9~iu%W&O&1Inc&ucpiQwiW>EBI0+fcAM*g4aQ_bo<2)<B-Qm
zJ7~ZIJqzops5Jbb@(|^s>}g-|{A0S(WUOy76YM7ojaNGwCd;=zc5gdXxn&N(H>ada
zdZZkaGOOmp@DzX@mXvSteR5U~iRk|$>@1+7?7MakLpVcscOxNP0@Bi`APv$WAdP?`
zGIY0eryvr70)j|42v~q99SSNXVxN8cJnuQ@J8OO4S&L^q>wO<$=AN1Vz4x`R>$f4m
zU=9-ExA;S}?=)ynU>muBvvKtK_MZZL?;EWI7%6<N_mDa%iv}TezmNEgM)aQO7F`KE
zg8NsVvN6t-YZ6<sL0s?t{`VT3Z!lut_!8PnK!4!?x~1hWsU+V!-`x(I=3CvYF7Q&5
z>H@A4{!y+Kq^FR0x?2r2U`P%dCOjbt?`a8qVmZ&IIaD`&i<fHNWTY8_20p|Me48DK
zNeoIijzH1ZF+O5X-yaidoO`^DdF-y`E{R7Sg^MtMG+6U80M1oOmo1{}m*_j0CwWSi
z(Q!0iT4?RTzB$dCr?;P8r5KE7f_8_wxD)A;t>B}Ae<p~N@Nug0cXTRSvY4|Q-}5+P
z7s%dQ`zJ7k9?hIrO;#k@?eP1nwP2vo%EHfN=;REA6Nc|MVJ4>0{RJk5?nm&?^Pn?G
zN<@5>{jd&Mg~4%kTu){=EmWP_%Olb_x{7BMvU(eygLBfM@Z!a7kzIATKEd$bnH1s5
zYR<7cTs}Y?zaVl!$Wc3={Mn$W1Dlhd*u<jWxJYHQN!c6QTVF1}AR!R*=16UFDGGdL
zu~<bAz?{wLMEXfA0cJqY7n1djkNNA!ih4^1wdssqAe*^;`UZmgNw5igiF^Pa+<Qq8
z$Kd(E^_iEI4{zN0p?8tTYCWWEin(EriG1R-Kfnj8_EMF$Bpp5}orGmIv(%>m1BXQi
zYi(X#wS(qiHmeRl-i)~T@O41U#12nONx+l^aAK3@cSd~skl138e}WGA4{8OflO(bj
zt)NSKf2noaWJpkVSaHaKM$XH}vCL9Ui6k0pGq5-qj(v~Xz>W^cHI1}&^SBacN#%6)
zQR?KTw+|Sq+c;k6eT22ar>0pL$*Lqb#Y>NrlROw%+~99uab=nbPfTFRAJlY~qR1H`
zh|)~w($sd0E4NrXT$R}CpWc(25h}bPz(AyecNjmTVqKnG_>6ag(b+K|{@z3QP&HG*
zhtnR!9cG090j>N}rZ~$HvI?x{k5}1Rv^AXxzVoiW#AZy9RdnahJl^K2z_#NLg1T^X
zxd+!{*|)bc=`b>$>?!kDs=kR}gVsdY6bg~Y2a0gA-?o44eaZ@4iKqVbJH?F_EklgF
z)F;7G*ymXhdDuNx+c#ai@uWn;-hOccL|xIJ%DAxSr>soB%UP+X+x6T92ti9_GiM%)
z2D#q~C^Y6Zd(<7fj7P~1!ShY40o&7932Uy`V#euOV{?>@hZsIaDcrTb`!S*cqx5EL
zQ=;=ydaJ$@_9p6t^J&~SU*GYOb`xbxG{(t0jpgm_6JfD3WLnSFJD7Z4BxYmOmLp}T
zNXI5??QI@&GS|`MoCTq3D0BAL-Q6-&zq7_=;b`T+s4+(HV*pMW7M<05-n2@W*0!xH
zqa=7WUM$e2EqL<E^?B#r(zS`Ugn@4yrVaK+67EBELrLo<W_Q_F8)u|~Q_$VbgW7iV
zD;C<+Qj@?C*zStv{u`r7B77T8Fu2&a8gXhexSJtoT*{%(w=fyMy1CIVTs3%R3j%S=
zq9nqO{tR&jnPML$>0GOII@u<x$~5?|q0<Gg?q17IrFOAvkvU~Q=4CWs;TTN3<Z7k)
zcMhi=3dK$Qx$~GA^;Nw%;p+`iL`x#?a)J}Vn=&}txg#|tcr+}3`?fx@+<i2T-;io%
z>Lej!0I&f&6x3~~y)~4TWc6NNxY*w6LuK)d)mDTn8?EgSl5gSbvS81zwOV9RFho6{
zk)B%#uH7|5U_hO-|H^f1lsAVWJ1S^&4Aoa<kSBKTq7965H7^5WDk_bm&|+Yz-BbRN
z!Y9cInPbsTI?iHiJ^|xgPHK$PtX(h0t?O!a=9JbpPjb7y#&J9rGo%+hO@Ad6OF;ZI
z*Cw3K{E|FU%PpTpC6FGx%^Ixfze^h7)LcC`>}ajZ0D*9Vvk8-}7*CQe<&eZROSO<?
z{S~@}K00*Ca&qEsuFN&2S4{4dj}zHXUx=bKGRT=Y@OF)?-%=~J)8+h`=R;?bSeq!L
zhHv+hgM+E8XDp;Vz3G!(nSFZ4FN*c3!Dzj8l#=RpQyeQlD@Na%Cy9xh3SA@<_`w8K
zCEg6liNag-*JK%_ea}s?+)TtjjW7E=jZ+;b`g5vb#JELIwT1n#n%Gc;G#N89Qwrx=
zWvTD+qep}0{=*rfKb_ISH}uR5>c30Bw(jTN<{^}4vS@Z7xHr_KCf|SW8h3-dvo(mj
zEWS%ebdGF~l&*4&>tZOiSy0ERtpo2fQg3j1q^0r+dD#r+N7QY@RI25aCuNjH#LL%R
zx$&&m-*FkRV2b&YPE}%Np1{yQq>L){Cr}<9>k>X0_!^ZS^STQ&#hZ|KS+6T{QrFLF
z(376#gf^V>)RTx;D|GIJJBfmk8R|qF85Al~YAtXt@^L@==7B-eb`pilE7vl$k%7I)
z{rKigEuNM0YwM&hNODN1YdFUz^=%d}q50OZ67tw{%_Pmfl&?x5s21M)7;Vg!$W-L?
z<?M>C8_k85oz$zbL!-^t%J-UXHcxWP2W0Si=RVL@&oJa_L2Ifh&bZQj-Pf?1$@7^M
zR9D}lgei$Xow348^xo3>71dgq_qCGm{`~eke`3XEFP@nCl>&gAmoSG{1paFytD;ZN
zhyPr{l$`_*f2VRsRbL^7%l0K734zGqa7?Af9L*5V^*@rmUD(SPEKfQjhQ^i|`lzF(
z(BFxM-POXT6R<{{vCVc@RK;fO>~LwwzYA~Q@98V^+ioHalW?9Bk85JnAfY(%j_{bc
z+#?eRcLd1Pk)%8AUNdjm6)C_`&KJI`KAG*yi|SklI2b){`TXq9>#Mg-b3d!QM#6~9
zSYs;Y`>z@waDrdc$}#*bdpp4B;{CfAw}^7B^O}(Hn%2AXIG1Af9#*CQkN%b(z0v^x
zlMykOJ<R2CPe+C$(F9yGvuP_Qg|l>ZNs=YaQTwMf8Ju@L#l%>3S1(8?;=3BF5~K;@
z{5(og(9bC(zx)@1@KaBvS@n0(*)PCZ_}f$bc5I-FzKh|xBaI%$#g<>cxp=qCDOgj@
z`dk0PfV)%hn!A_<M#}r$A}ukkdu0@U)$2D5b%Gm-DjZOK;pGYNUq!>u#!)NC7sq$>
zDm^2rJ~E`zi|~Czji320;cB+5t45`TK7mG$PPknK`mMQ!t81L&)VF}YstE@znkSWH
z^)sz04yC|YtFJBJdH0&2-xC%T1HF}P4krh@rRwz!Z*QLowSW6>4+#A|BX-?4!B>M<
zFTtFWRr;(cpZ}QZi<jz`H42-R=ZhU_>vI1cfNKEY$?|WAkT=9>8e*Jt6wzP4K?NjG
zu(|T4kEDO){O*;BKF3FGD|Xim2OaPjuBWWLFgi|ml$ZU{*X&E&xu6nt*guaG<4`Z=
z@JzvPE;a~eScQc>2|Q#}ZEud63)rvut@9TgG}k7{zs|hnY)xKHkm9vW<NG{6`cqES
zjxh1l0IOdxtsF6jn~jUG(ZlQu_!%06Wu9~W^t643Vud%usfh&p`JQT+HPx{As_jON
zhjk-R%cJ?QAgF6ba$vBKE)Osx7>c0;`#+u86`nXgodx(Vb#cEfV9Gjur72IDTS!s*
z8W#`-UJ194*;Zcm^ma1q8*{bL7*d%GWQ#PMnwf%7%KTi+F}3?E?)UonZ%Q;~<Hn#Z
zN4`|Js81S6)OHW!D=R4J{H1AiM8-HIrdb*ur(2hs!B`Dzme?1x=zFSw*l~o@{sY$q
zF*4s`ydRCra*{O{-a?#d(Ro<@h&o}|JU~C>keXNO_V3V^uEc90;u5}1c>C(Rk&B%{
z-iF*Y<`8UtFXpPu5!oJ~JR)@fuI>l#hAY4P#e;G5RIji&x?V`@k+?S75Slq^WY*oH
z7&Kp!U1GoBo4u9$bi3Q}nrjw}W4ZUPFw(5tCdtx$Y-*_WKs^9HM$?g}>!e;1W{m9i
zolhcL60BBrBK4BEeoBj3p51<yrOz&>o!Ou4bga#-Z=<~>bIJPRN3Wo3ofMd;x<!#w
zNHwUs%1|;Pg1lT5xacN3u5J2Sp{20b9S0)nEqh!(HSfhXB9&ju+>J8AxLH2Yx0a9_
zoV#<%o5qrd1ii&KGC)GP@2FNsDPAUH-8>xg#^u}PevJX=M%yQz)jB@KRXTF)M-UnJ
z7jw;h8*7vNkC;Q12D%MKPVmDeElNn|L<ypoOa-JJwdW6un{a&d`S#Tdo#HQ^lC?(c
zGgoY`I&4_;k5RC#D{RCb@7Mc|E5}K(_TS2ODTue|Y5zvm;{D+cnnxImPhQtD)gDD~
zHzi&r33FmgxH5GUY(Nt_o9Rt&n(XmTMIXO^K8$7}yU$askdaVLQY?{0?J+=ci7&Q^
z2UUf4w#j!*%kaSf6HUlkyy-04{d_l8<J%e>;uxOatGdkmS-Chbg%GK2$-CqPEN_0F
z+QtJ^87Mq9fee$h>MJg7%F-7}p^~*igBok{Q|>b+hGuM-PX%T}JgS-lc>Dgq!rB<m
zA#dexwwWfCT}<Om{2X0PT>76GM1smPu|V15oaLhZCnOSY2s*+r`BGQaS}y?j1pKid
z^23>=-J7~Rzyl^+Q=<a0Eeon=2~Cief2<qxSVO*9hx?f4l{Llhq;q6#`S-+_h=tN$
z#dS0xmmQJ`)>oRsSR%d)qsq{g-W&9P7-wmP=toaCLCWQ(5qsggAb|Q=w%(QG8n%!#
z$$x3<DQ!}!f_u1Bz-v=Dmeug*1Hww4%S~6pMku3#7=F-Rh8)(m%@rpHyG-K=ddVt7
zJ~G@whbixxQr5NGrcq1fq1*%wDF26yI*hKgBr7hr@+2iErHta6o?VrxLglHdP_IYV
zeYTN-tXG`FPtQO7!=?i-ou5<k?F~fFs<f(fJ!+J`5JdqQ9gUtV_~`WP(*4(2wQYF@
zJzqgcBdk3BS4iVX$7nCy$YcFSBobBDV2OyA37%40IcocF^&y;^re<ee2RE3va1uv%
z&oo{^vbAT9ehGXQhDe`W&~<Mv<;b~40jqDF3rUQU)kY;=R;AL{^Hy`;0R#C7pk3;P
zTHCBKbo>VaomK5SUOYK(+PrHBI+<O0E=x4&zwpn;-nw9%r>pXV(D}5j^d|(%6;}8#
z_&CpoFpVI}sSNA#te92P3`x#uikY6A+JAle{GjuQsDbC1DQxHUdjw>b_P6n`lY2wJ
z2__Ph3cF@(M@M(|l#=#DUGdE7Ox{C@lhPDAM3;EOQ1TJ8yT~EhZK-s^@m}^V-JY+U
zv&tKvmb@wuF=*5ITqUS3&L?~GjM$<QIY$Yf#HauzVs)}k{wxuzSFapap23W5p0{^b
z?hd~Zo1w$}|51S`f^H+8XuC*vU6{+h$GBDH<_KmZ<DZQW4g~g3b&vIdnmh(-%v?Y?
zYwiV2c(Ce9P09@bDcW5h_)Xx(%Hm2)CtNxQDm%=FUpx-<@K^f?gZw(v`(uQL+H^z#
zpWpqe%CU90jSHHOWAgD|!N*S0tIjz6L*c=YGQpe+h#7GqE|A>wbiQi3e&6;<KH>cG
z8+}X|h$=4j?G8%IFz=fOvF~PL0!%>_+RS%i7q7moyW>B(3|h;?^$VWDUSH|X<6WF6
zF6zS-R*dk<Jw=%Y_j$#s`N11e(G#^LBTfOpVs>r1`n!DEzUM&&?f<LqtouBM{F?9d
znYF=eEf4djT1gD5hFeob9W1h5)-dLnUo143?H#n;f~aj<?7XTU$S*8av5cv|Tke@c
z5UqA2LWgXa<m^@!K#ELn1XYx>-pWXV<4{MYj434(R8&JI-5OP#ixKisW`dtDUbpGv
z8z5WYxjS@|i5X+BzME&#?qeUes;I`&)W-A1<Py%tz$iaY@bd~0?NUw(9YAxDh&07-
zMAO^G5v|xvi6{eM)$(bWk_40jbghbEKZ7mTz8K*iz+)cYnTMp`4m+dM>4QueN?9Z_
zP`(19E@$cR;sOM?jE$5u&qAcaeo0Tpc}1|=jHZV*W>+O){<=1YL462ebnoV}Vk=A>
z7uB?q*p$A*Rjw5Lo;nf?SuRhTBYYca?uU6;OkcQd=)PuoiavNXjV_JvUY!ydk_ODh
zqEk}zlNrXG<l*wU-@kqsL<$(bn1gn7Z?QQ@z75937~4;3BmsqABhII4<w{6pUL7E4
z(TJyNkk=$u2AE)}78}8_wR{*)Pq%8=?jtzY^3JQgg13J`ln5dc|6dZ$S;PM$;aFWo
zvdAt$yq?U33K1hdngpDx!Acc@Zup&s09T0j)zU0$Ya;i(WcA+&IBsl{{>_PUZN7bC
z{^KHTKJ<FDk3YUNyP`RH0q2nL0PXP#FoA<MmMU%bjB2lz$tuhtq<8=Ah?Lju@(<ID
zKP{4+#GOI=jBP0A_gvNyKc-`scF(f8AEPp0m#K8~`kM$LRYi$owpMzS7eEA$o?i~v
zYo)0<%szcNLCR5e+Ky8)wr#roxpcg5qA%wi0n8}iqdehepv}dH$^bQzolX7u2n68e
zndkm$H%>eLTZIm@{lBA9@Rchf%VvsR%nsM!w|b`;@f!k=81x%wwl_{5jKic}!X|om
zv1MXSiMgE&Vq!L>ieh;Tr|7=D%31fsW^87d$#G#MsdDD7-t{0kJtxzYXcwWzoR@Lb
z>(1dv;i|=!*ZFhc&YFIOjnBn&^_KR#NrbCZ9pO#jQ9@rOxhUlkYHLjx#=um}cO%-E
zoIoAd%}!aIDNe67{jeiSzQQv})b}BnK$K0(NttEf81^h2j!5Ub3lqr){cap@x*t|u
z9DENhy<>~TzB@y9wSz|2wPoSwFEqvPfe)Y_J4}!%F-6X&@Y74+oW#Ydb|^27+D
zks=6XetqrTsXx~NW%FmsrvB!X{DUchyq@6-Okp+61WDBAy@gF@BhL$VA(`qL7nel>
z$Z0W(e(xU44$m_s;Nuax_RH%rF=vdb9&yl<u`4G95}ivQFIHU=h{+)pLP9_S{k(pp
zlC)PVlUUQmOYAaKsl`MW-M7_pLVcw_?PeIX@URWDXh}Pzk;*>^vVrKuW=69dL0T=9
zIhF8YU-L$#YDx#slHyy;^DhB6b=C$PSA6@w#5l*L77<%%#q)sI!jsCVA%>5!atW)A
zu?x5gT4(O8;VBBHI2FcFr^3>KoourMY9mF;>V_S<#&3{PPGSux{AQXi$38G7y{n@&
zIoFq)Uig9Ni~3LLe8Ojxy|RxOFuPoe#8)G#L%^z1)aJvZwAX_dHB-vGE3R*Dy;sWk
zjDF28PfK4er6s*b>X?>941MPNVyP<LVl>v-lMJu<DpmEZq16egtFJ3Mq_VavNjW8W
zlb=i;`1X-_zS`u_pZ^#y=RxAV^276<Kzca-E>#r%Zoy+7AZEgxgnsw;O@V!04Z5>t
zYSHX?pY^&lqbTwm-oFiG+M@ZsR`~Q>H9?^%K92la>FBUaBtSzRy|FGTYlxTSDk(0g
za=$BJZ7CODzeUzaN2!O;v+pG)G^KxbcR;&K(&KqBmz4xnCSqP#)Y%k0=HIogJkzmG
z)t9j+YGI+$-YQ7F$zONR<8{N`1kl?lsG=kjPndH=hE0cW#mo=AbAHV=kU5U;OeE>V
zh1F*gmcvb!oeY?rM;BF6=ERREi;MLJW5`!Y9EHtK26s`dJsD#<p{NB}f&DFQr6h6O
zLBc_X_ABhM3mn>X9to^CL$pydFm`IHnA0!l8S2@|Y?Eu?%{nzj<5ec&iQU7zYadC0
zDTV$0GP<(PlQNVh!~TnaQQ<$=Wgx_YPgG6QSAua>eVF^$javi9loVA?NmO6R&xF|_
zqhq4Wdqb{jXl~uEAv#SRMV2_{JDc7!pxJj<oC`aB6#}rl7nrEWp&FeJnz<f%k^1Y}
zM01;VshW=@DWe4bg_GagJne-9p82%bh4JC}tTi>4tuRO!5;3`Ydsrn&2MNFLyAsn=
z-WKq3pzezQq_kp$p+i~)<-3S_rhcLglEz2L?VDM?2+MBZ)^n?*kh$YZtgsNT28S<t
z?%XU@jh^y{_<B|4{QCAlS`VVSav&;Bu{CK@xL{1L96In_p9@Kp^5qH67h_A~)A$Mv
z!&bDe;sJPKb^!cAwM4DOWP8Xbmm}a}DzZ-|BQ$!A7BRY+;l!{U5n0As-DRvlQ%#~v
zCK#@*hBJenJl>w^ul?vbF4J2ssg`R|5>3QO-A_KmKV-<19>t!VDSJWET0*}tMp#xd
z&{vN$H|f+E;Qj|B28J}vBh7~<>};Y0Q-K5h;Cqn|uN2YU`7gt*>TBzBG8cpAp3H5g
zvnY&a$m&~Ea_aD>TSRej7H>-?{s=C=zpW3Kp^Wncov!s!K2fm99slHW#a^jN!O;sw
z@9$w9&wrF)|8MPP>RTy!S4NS%t}7v~!woUc-0bG>b_^Wm{5Iqcv*I-Sbu#t>Zi31(
zJiL9?Vtma%N-&$G^%1ju&uFDg35O|)IRdn%&E!m}4<b!TLW$&)2!0Sn#1rBXmj+L}
zs(M4t@Q!-ywY^Ni{uI@yFB}fxJ-4Z?9UD-OYQf*|6so2;9!Kx~d}lD#?9Di-LQptK
zh;okC4A<r{nqo40vyJ*sOJ69v0`qz&Spk=y%!|+Gq9J+ZN!@!5?``3?mFM7MAYaP=
z<RXV_kVan#XtuJP(}Cigm1cO@m_K<_Iife>jBcdcPb^c{WzP?ZZQIb`H8hmAjRag5
zHeqyCcEqfaCy0&B&D+^2WDS4>gPy96AmT8_IeE_-m@6vJUe}rm1uX}=LH1)NX3#_v
zNL`gQ{}pj0qE(`f2lYc9{b9Z%J@Uu@E+vm37Dlw44rOYjS!tz5H<;C>t3oN!JaBGu
zP;c;%!eqEIPnVQu9<9OVsS$Af0jcfUC=2q5^>44c%#JXthcjy$kU_M{W2BG{Sh=)f
z!U6MXqLzB`e{2anBDk-7cs|Za+_v*ea8`C)Qtl1Rj`kidOMxE?<|_R%SG-s2=YY(x
zcfZA7`8>P$n<vcXc_uU}<vbK`yHzhL>%Y|KA;$-%X}@r7DkAr$QR35Z6;aZ>)Vf3o
zKUequh21|o6)<|POYKRl{R6B-R12PSpg?8bPB9LS`rng%l?2K(W8pzQ>wW{o_OOs-
z@)v8I5s5^jQbJw`AaK<SgncH)$RC3*3J-YBBgDS{vyp;h5Z&Z`At70gH%6%FgLD3`
z)>~ZnXn%icye+-}NS0v6mcQI@6&4J#8N#(V#KqdyCQ935-i)P2Ig{<a{1?<KBVYg?
z!Fx^8(x4+p5J9tI<)~DC?J2=I_(<5|=g}AD^A0xx^6`%h^{FrBqQd+V_W55xAJR{v
z)%JHNw~yIV8fvpx{rdd)g11GYTKqcqGC!l!ebrG}Ql@lxp*~RbFggAF(aa73PYA#$
ziT7eobO?X_So#A|)_@4S%l{au6>+<VPh^O%@MV5-Un<o{-E;Y+*8~29r&%xk|8+}@
zzm_N=mbpxa`!bWZX$HPT)77$IJw=bp8V?_sc?WoUO=6uF4qyrs7yL(c|&2zG(j
zY)trpyR15zVhj|7p}PezX=}Tlmyln4nvWfPIs8k7?iy?e%@_afhdezyn6^XHIlkz6
zHxT*Ldy<Ha%J~>DWB>Adf#dXF%qY}1w_$(}GvE)v%aVqEg$m$w<xgS9j2pyuixorW
zqH-ocS13^Z36?8Bb$38>>c2jMrQd}uH6$!7_p$)x{0JF}_F;nn)cGDP1T&t=OwtHc
zTOshnM9H}l2PBP8uHd^Gb$=Gzo}i-TkAv&60dha^b>F;s70_QF->iPP*Jul2^ZOED
zQ=k*EoEwJ$wrQj7FZj#<+5U5QhXOkRz<P5RYSSOx+(1e16x6#z1`)CjXps<DAu`V6
zxRrJW5@X1AsTgv0Wk}nbHnw`bUGP+o?Y00r0?>P0C-W}@krsLauQ!h-brKv#a=^0j
zqqe5_F)Vc|Y||!(Jxl`Skbj=ty=-Rl5}-oh0AL5>s1;xWzCm|fSn#%N0_<h|_u=XF
z;<Fj#iedR@{h-b_7zh97H*jA%&Hh}3>2bPmWsPCc0(7X5M+}4P$@U|JP3>=W34<_1
z;gm4muk$@eHC-kHO?=k2KX_PyyBp`D>Yipd0o|?K^<4(F8mG|=Yl!Wqox29kUE6XE
zj0s{~^LqO0`ayFiheS7o{-8}-Omn#R_#X~HLXZk@{S2yqK>%+>+;K<|>Mxk)avj1*
z{j|y5X&9)v5l87N9?v<#OWbB!0-bg%6fJNI1w$+G<qfzZH-Sq7IQd)PZ@E+r{VBSh
zQ$1$e`{=?z7C4~N_;7#owN`IS5#NIqR8DEcaiY{YI5E`zk3>I-n2;6oeRr3k8%_h5
z_%o+x#el!bhF+pS(>b=c4UY(%7`AGf1nwLO5QY8(=8m>^H}2h^1!twU(=;S`E|@Wd
z>1z%=XLK#Zo=X{?ueOed4%+d#TFJa6;@N)P$j&Dk>eSHpm<Ef_2}f~+e$?Z1M#SV4
zS+_7Dj0(=Wa89`FycD8BD7(N*mFo|PW7*(6J4J)L)Len+qcWTojrKNh3w~eEG@=@g
zywv<qzc09*h+fq5aDR9uvL#2Adaf$$=l=N#?a)F3h-z+i1ynWLq(2ALxS6u8B&5`-
zZCnU(k4YaNFL+2+I6o9bDf7bH@_D@8pjQQUkGCxF-HjcQRx*dUQWrcFY479j(5pHw
z0IHeH*|1pgDk_gs3V#Wife+9s0c-=T(E-gBf_`zhV|kJ_9nsXJpYPnueIm6oJK%7r
z8pzqP3wvKG5t`z|QJ$oyo*-n3&SX}9Ao4Vp=Ym|qV&g%vMz6u;jNI@aXV!Sw>S8v7
zP{MUwVc%fGlz88uxhVq;8*fDZzAWMe5S~%nX+kBwv~}syFo$S;7m%Byw9i3^9-J0|
zzTl)m@s$Iy^UY6Cu%4YPjEFCcECiFW$Tj(I86NmjbM&D@yMZHL9-5|6eTydJzS;^n
z)boczUV--`p5*(Ls>C$6x#kfmf}_8h^sJuBtjv>v!VTWYLa8i6In;JM2r3IvpPSrC
zaGOI8UG${~HgcUUhKi10u^Shkxnn9NT8PrQzHEdH!9Cw4HOZP3$1K_d!-V^Xl&Ez(
zg=X~YWiS${ZRB=>b8ZY+NYJM=1H+(ritCKWXHKU4H10wNNXj^UGlxxNh*Z_rhAHC9
zx(O%7$r||KUAVEoZ{PH0YcqcM=T8ZFTS&1h6;-<6#f&q$WwXMS@|5Kn8HSpMOqUbf
zrYjY=(gmgDCY=R%H8~|2C7(TAYI(J`+hX(lN#jm;j8a#_xldqwetly7W8ufcrJX0Q
zE_e7jzhJnQ3x4;&{Ty0%;hDBN;;RK@nmwfWM_GaW0(VJkxq7~Q{He}e$j^sBxPoH_
zr#5(!YhQ$nwKLTYc=7FlZKX7<J<6OoX3YL#?l(RPyA68VQ5J`eW}~#Sz&%`tU-ZAg
zbI-0kVtOjil=~MDS3L%EaWYLA{0_FBJ+p|Szq85~_3WX1Vp!kQKrN8Qn`FAdLL(<#
zU|c!S;OMtfl*}NpU*Ft#sfuV{D&~%d{;YkLM2*jB2hD=oL@E8*nJ|Cd+mR~a_aZvE
zf?7P!?(kG)N*yOmROTva*;AtBuqUSTgPpeFZP$MqSrp?rcQ;U;Gn~y$nljd^*G^aF
z=!uTHOn8&zEZr<CT}qjqey)0Gw(q|E633A)sz@_MEHhflJ_RA_fx`>gd1fG=8zOfZ
zG6X*49#lR=TWl4c^*64bra#9t<tP8ZkkXED@`ZD%i5wcgJ&`N2iP+`l9R;2DJO&hZ
z>r%PW#Be*b`I8J8B~`);lr0D9av$2c$mhoQx{kDyT3}X*><*e6>07oS=5S_{<z|x7
z1ew;cI(2pJ5C=_I0z<uq_>|T}xi)eOjnfYAG$6539}rGxMUTjW+wb2Q4?arKnS0>0
zE`F#gwEb5|DiqQ^QF78&{?|rDtv2FShLAEZsmyH93poy6rC;on&_(i%jQ6kHNmS8A
z{%e-q<uY5oQXTO#sl^L_@BLg9!6-kHu~GPuEb1F$e<L)%`Aci+{|w9ZC}oil?JeKi
zff@mqyQk8;=SKaf1Qi+lb>=M6(tzSuen|ndrZ!w@geb3;iqL~m{;S&$>nL!^p^ET(
zZP)yxsgtSlRh&uys;20A{(SD)8&rQvIySPc%5Pm;S^M+*UaTkwR33g3=zN-SjMB#6
z3E>~=w6cXijx5V;x`YJ5doQ7^z=l)WEtQ6UF@H;o^+e3_c-l38%Nbexb*_&EWhUn{
zkSM<e&*hYVeXUO6;cpc8CBr#RUDhSsz$t$4*C2|6-WFsIS0(eJ=Z%awd;FmTjUR<$
zv=8SWeq~E@XRS{2TxftyIDV)=<Q&5qlzK*<!ils(GB=L42M$i>pyP1x0UZVy%JRaI
z6i{fUij~e<<&tHt&>%>nC9p|@&N%gGVh?})F;pAiOJollOB9FtG_j&urY0+#+<6*t
zVZz$GtV_l#a8+l}9N}~NZZK1uq2dwhhNBSfR*5!9LIosxsBLP_!B#K85+)SMt<EDi
zM>u?;8i)@zP3KxBL@ms4@K=<6r1n=c3V|ykp;Zcp==`fEz&!^i2dssu0f}**kIbma
zXGQG+y{NU^Pm@xI*0WbiW6ru2eYrzo32b$^rH4T(7fg!?_Za~MKuvOExd#jW)MpR`
zg!&HdVME<8&GhidEdj$JRBm6u;C2-jB!VHZJ{Xl1^AcIl10L@sK)L>cTN@O`uZmUd
z;l-msL3IBIm_Flyl^+U>M<7Y*1;a^*7+3*>KNV^Lr>N+YqggPD=AA%ElpH+e;_O@$
zFslQDRA;bxsSyD;>k$5cEf_12O)BdzprC@f@x~CNYZn-23qgBd0O(MY+C@#EP2V%A
zJ%CCq3`}u_-(h8<O?T)wVAN703t;jX5CwVCF=q>`m**A=q@LTuCV*YoAc%-fmmUU@
z=W%(0&>r-rF*{LZ7IWf$sUBW&dfQ;m##{V_J}4{{3ad=>aB9N;hov9hD!wTS@C4*v
zT7LcefZSbXdkq3LRL@Y}b^#g+)-TG|zy%p%&1U`z-nk@()ps*c-a+licMHd)6nzqp
z<Bs>7SgGz}MGQvl{Lnbzl5oHTc(}%Ep@<SQ(^OQbR>7hW9&?`?xc63`jyDJHnA#c!
zldp6#8eWB0Cf7aAb(W}Q63*c_^%j!u-{9E^CY%oTuC;0t#nhQKdPTj4MBl_4N>m&(
zs%F~At$r`VfaJM7G%uCsXQ+b3ZAE8oS)?DzSK)^`ldi@fjtIw-72Z^w<#x$1NWcKI
zMfI1@$XN%t?rGAC$l?Vo))nf1um{T3fC0HrZylEGIr~(kCC~^S05zN8HcI0%Jlqa$
zaFM>?nZ5$=1|QU2(t%+}lEkGG;*n#*etj_$e&EJh!Jj~XX6h^1N*_I_#F6yYKDcK@
zc)01aIWNJ-Xo;p1uv^`}NDT(@BWM!zom!vL$T^Kr9Fbyj`_*>Rp1?sTN522%JX?|9
zn4q?Jz%$UTon4*>KU*=ts9ywiv6i>eDc~iPW8e-QN(*L92qy*$d1N6;LSwz^3I|wO
z6XoB!aZEY<`t<T1eTz%(fW>$05>Ak-kQv;>>%|yD{2zv%hBx5YuY7GGEYdg`qNUtl
zAZ?3VDR9Oeo+9fsM>TWhg*W?hGk!yJ(bRH~?+aESbEY^3scBfhbFwn)DNR98?g*UG
ziMBgWLp#5D{IOfq#LQ1}GTijrnwli-Elv`9Za-4t(a;go!dVj$a<|4)Y*>lo$?+X&
zfzXNe{UWfl9S8esD+W2OcF<e7<{7VjOjGxY=+cdGl?w6^dh*XhTNW7ziuGnOwwtpy
zpzE7+Xnx%d1*4)fBqos+6%Z5`ZZVtlYuaD-9O%0K=wZ}N@z`@6-!2&&LJ5C=cG}*&
zM!$<z2ZuDu${tRjCd0fFxKlcII^38lo=CLsBsy~V@zRA-TLHIZoWqtv-S*3#3=UVD
zn*Zm13GcTmzI-Y;zS#`#bPaS9<zip$RMR3Xc@<BT8;1$-Ppo@Qq^T!Qa6lTLaO!nF
z#TzwGXr{P1Gq)9y+&N)l2?$}YCtqGcf$jmG_j=DJkDR%egK{3-?@xHjdg`56EQ4@_
z{2)lb$hh061mIIXYIo(mJFmJ`UkL~y7XD}agJL6pfPV@B=0`)!f84wtn%@9!ZJLmo
z=(tTj{_fn+S$5pbDRViObAyQ-k`7recPfxOEj3CnTUhl|lOi>##tjlhzSrdEe)cR7
z^yw>c2LN`x|7*H*@usjt97fQ(M!bhZRo{v1kA&}#SpjpzWwLv!-`rcOClbVQT~?=E
zU)8`uDHxue&yMDKUWRUuQfYmeB6_uWykJG0alj@JF7(isQ$g%W@4DheZMzr_$rA_L
z4_CP+GsLU=6@orZ*d`iR>-SP(AKC>wSkhDPd<da;+Ok;W>5zsK_@(f`U{LQyf9)pR
z`!TtB+=>;GU6M=@nx8{&<=U+#j#33!jyQ<E)ACW@bjG_$Vi{GmBt9G?K-E1S8ZtU~
z`fK6W#Fr;0x{fc(Yzw2NRf730a=#Iap-J)TubKYNG2-)(ERUCbboI0P&d~N}uezXK
zp5=ZW@oX%DTiInES7?-crkHuhL3Ysxud!g!spIeg{=_9ohL!zXmt+Q6H?Q`%O1Put
z?Sxl33Fl}MuGHv|e6IFXaiGIFb*Of+QL~2=w_A5tv^T<_x}WXV(ZyYX3GwEn#9iT2
zjvrsbF@FL#jl+A=^@|!=Vjl+u`Rbs%cY4+KMo6u_>DM!uxSD^^96dJ1M9rHeee-%j
zewKmLOCa-&W@+(0bhy}YEsLK9%YXV<F^&PbKQyi1`#xMrPP4K~vW7B|Pu_^}9YJ>C
z@}gsQT2!GkSCZLfTcYOC*uvE`1Nx9o15e_Gy;+Ch@SxrZg68Mj_2~9koKMOt1;3(X
zQyb4U$mH53)00s}rD9r~NUVf0Cka*8Ea<#7=3<(u*SJ{UEd)E}swN&kj5a#CpV}yY
zEaI{~A#9)Yp|CO_W5|}^eSd+I5ruf6c|)A0pad<xl!|;=epIK5VV#kct?(hGGzF=h
ze8a6L=GvK|k3uZ9KL-e)u5$D!E!6i)1(wC0<ciJmhi+|~$i~wbSDskcJT5wY1)qgV
zTV+8(SROsfHFqvz`r4Pj+fOy%GAlW=MqH9O0~>}9R`1qM!qLT*hKATs7Fv2^UmAAP
zT_JB&;78648AMJaR9B@uLAvV(ynpqiJu%$H&x(uR^<|X=L}&9hcU!}<v*irx0=Sk@
zsQOGlRUwH*{H5F19#RPQo8{IS@q#{SGMn7u`Hgc3qdy9@$tpLLdaG+yFK{_Zb((QV
z9z6aV6S_^Eme?waPl-k*C@76s<e|aWGA}A$XJph(hj1gR_~5Tx`3JKtxbsJx%!iye
z5~X<Z0dhbpJ!rQ}!ghEKpInTf0KBQC`xhGe8>uRPil=<gqynLem$EwHA1EdTHh_A%
zTUDM@^#5DycB|s<G(Q$X%~OCNMwn*!H_+pa9?@KoRj*CqfopT&fp#6szgK4j2UM>#
z>h161$%WH;ezpcZ{ET-KsImIgd&7sL>`z9H9+rN$f#r`QHh3*|y{I5*0lWJvvJwE%
zXKpf#y`FPLFEE&uh<?{$*>A<D!Tpl(k>6|IWehyIy_|3y&`R-RKmY}P{VV_y>2h(+
z&bRPG!LGzz^`Df|YpOJLO~V*12?742tiP~R2pbt${7Ll<zbU{-v)pU;8eRmH7Wg;j
zPyK{jEFK}rT_BK4I>UaANoYyyTaZWxtYpBV2*5Dr3vq34(5VV<K==W`g?G-~{tf>E
z2mT2RWBHV|E}G^MwP(V6<m|BKedPLL*O8a9=sm!C#30h1g)rNWpe!x|r3@&gT)j~7
zKqP+%Zq!c>#)LrGf%@kYR1BIw;g+mg3VEJCu;04z$+2Z<5?B%JiIPmSo+w1W)lq>Y
z9wXwTBV||q36TY47oJtCTeIKB7=VBHFQpv-H4fjrrwhtnh@S9go%P9EjJXHfZ(K-r
ziDy1i8y%lOL1u~PlG-+FoF5Wg;AE;8OZhyL!y&Zw$n+!Gc<M#Nt}mO4Po6}jrRWza
z{7vP65Nss3f^Ued0^o;Yw7|Q29_}TO3~Czt3__BG1(bAJx_z+xPZNwBfF*_#%Ryfi
z<Sbs63IXGYFRznQo?M<!p$};>@HVo0Srx7hrWrC-O+!<7C1Idh4|%F|BcWGk@Y~5j
zW@`c=Gd|D_@<TuwXXZPan+Hh_@en`~<;-=7O=*{j+<gdFDen(Z18hCMXih=zJBTFY
zqf^&%EO2aXn~$wrGa2N-cn>9Na5+!?EQ_B{uCTcEUH#%~Nek)R=iOi2#TD(|9W8Hf
zaq0XI3!pXN{VdpaVv}rI`5CGBd8)3)=sa_~l>fwjW~_NOmIp20nsDIJ^|DvqfAa<G
z&YBnX=(`8iEt<n=8Z<NmiEf0#^#||+{u|#tHrHXfwS#&x(DpyXFncW%1Vea%&myV3
z{)0Y8p?k1?YgY4``svv+f4CS^7Mj|lZRo?RQPc?s^tkx=4LA(_#tR~$l!%Y0lgL&D
zvREk{Y`yE&w_)w$3D5vFM<04Gg~2BWX^#?Uh0>hCTN!U-gL3@}%4o$ZWTZ4OylHPk
zva?qE78Co#G6Fz{gBtbZ+@EhoTRlZzxh4gr1zm8r?$|+Ghx6uHP+3#)(cIOeu#NB+
z{15JkZ>t=i;vvCw54I?M10`|KAroA2$U5^grl6}bXj;4!T((3hACW4+Vi!bh`_!jz
z0!*;q2|^9PntmdUGs7|ujMd+#*n-1%x<PX`F9SEr1hhNv#s&kIfL^q?fvl|q#=}^`
zAJ~%v{CIbk9fuq-0Nm?t{_2FbL0>Hs;TW`yU4-rH(;!m9r*udQdk1xh6J*=Kdl<uX
z_KE!X^|*CJhoSd@p?%X9xT|L74vU<ET=QS7d`FPGUT=v+-~IjuC{^~4?XW>1$S>~R
zWjN#e28GT;GbzjdCr{R!4FlCiQturd@oq1my>C@t+xYOj=2_$I+iW50l-;I^UMXx!
zOceq`6{rT@Wpxh<E<Dnx$#3s&et!G(_PLZ_+o`b6qZ!OIBzQyD-bISnRBXsc8?ihf
zd>dJ~1?VAQZ=5-^Sg3YRoVG8MFl4~V2NrXUegVPl;7+NzWO>)gfq{ypFGB@>x0qB+
z=Z9-;Z)3h4!5(%?XilNiw!Ud`*Pag7D+9VyiE`}LD!9PR4g0Ddx=b?ds-rF5QCc|q
zfx24^B81b*ja)fjE@QO;?%s|$sQBz?ol%QYzO5Qrc4p*xGEDM*Jl`3r`igQ65bS%Y
zt1Tfnevs9>wSCPWuvc5)6uH6JRzA;!^k@s|ePm0e?KNbxP-h#qHtR$1<ikzVJ#UWU
z?A#WepX0{(#?%B7_f>A5$WW%lBsR<#Af2nd!ecn#`uU5O|3F#{0w3?`W(|qyz0VZ(
zS%b-3_?-?zI<$`xk$GaGM4UGf#XPw+A$=8vz8X%ZWDqVWMshE-<0LnQ!}p|*tMQxK
z{s)8~Nd1hvA0Nq^H;8(z>4UNL$H%S&Yap>%_sk?b1}$DGS%4$?YngOhdjE@OffKwV
zUlLq<kV7h&Nwdq>=?4_UzycptkNfcjUM{RrA)}|)7=#+vXZoHL=v*P|=mwepH#F}L
zvX(oxiX9E{L!wIU6F67DP&VVUB{R%@?)<EL;1`6ffeo2TR|1yOKn87|AgVq|384PX
z35UWwJ*=6y8+7qBo4ph6luR<PP^QJ}7&=+?-er#vh(E5Jf3`?r<a={mXGn-o0h9B9
zf>c9dK>S8@4|GS<=GLuXpIjcKQQC)A-Ld41)ua0ThQZmZQS|hZr<3s)JLRJ}RHWuS
zr0n^E+BXdt;)mXre6ssUZj{&0Fu<67nyAc&IxZ$LIDmxC)Vz#a+#vc-5@sy&jvR~^
zTArp@iM0Cc6I^NcS%E%z+?q2o;@l}stb)BY%mX%m+g*+ljC+NePrz7mSeyR{$5GdR
z+J5iwaVM<1ZIJ5LonZ2RvVUvqR%F4s{l!1~q05Q!&T&VOk>F^25c*4g&{I$s{fqU!
z0Q&_ZHTqCDQir+r2lrh`&U8<pd{$O5lyB0LzlMX#Fq3oxjBIEz^$FRidnIQd#&4bZ
zyzR1IN$W_!mAwZOl|9<N1Up{G2*v_z;N9>7OsR|52M+7@dCJn-T;}F=U!A>?EeauS
zS>56RhM}3>+t11iH5H#*Et+?jS$<Mh%(b|!myZBR;Fj5Z)4+l4gSn8wMFzQ!v<s41
zL(UVm#)k6IH^m>vGR>&)^k<knaMB^sY$vsUkYKKiB1=>b^7ky9+Iqo7M=MU{8*|cA
zMRVUfH1e(}muM@2i%D93pu&2egobWFjbL6Z1<rdkO4N#-?}VIDVnBel`-kGQktv~@
z4(+=Mu|aJ0L0MNnVJL5YEYr3nwtDEkQASwPP@5z;q&Fe2)p+<NWF?w@g8mdXWxv=g
zda=Vl=)(&PW7MTBtC&E#Cn*BMzQr-=>1H0?=fx9rZGK$#dw@PMCfpx*fVZkVUXhqP
zm*SaCx;U2D8zXB-_K`p$a;Seu*O6R9xHp(@p8e+L{?%u9n_|(+ye>(J`lrP$Tuf*4
zF?_|Hi{^RWnt1k#<muyz26;UWtO>sy;%0!l>3We+aY|e}if70nit54HC#yT*eJ-Rv
zQjAeWB@RF)m-ISfP!@@&yW~ejUa_)zcF>w(=wd5tDzzyyWP+7UpU9P$W1AB8`>W=`
zL^g9puQONh+&f#f?o`ZjA4_z(LkdZ!M2yhAkawg$5Ahett_9I<*hDn)ED3j?fAyt*
zxjZ>IhmUv3p<+CwWF;A`PT6wmF0Ux2I`?DHftzN?*Gm|~@n3?G!85JooUys0@hjEO
z{JEU2(cf4qmgDj4X3h$KCtDM2umpZr?FS68U6*|5`l(_}Gm~6hla3TN!ch7(<zZ<f
zicB3BElD>&jlPVQFS6hlWf0~%!ImXx_uva@sfxxRv7b?C{%nSwjFvHSjNKl`(C?=6
z=5|FA`<(HUocHdb3*qK#9=VP3_ovL{m6hNUofdyRp-(3nub7)2V}31!TlTW#d~Ef=
zP-MN6lHsF|7+2cgO{w&+0q-~Y<{W8%T2k}rH!=d)^BAA~s4M60vj3=W{)Mu0VR3yJ
zSCnjSGLV%JK#qI+JK;XX1B%1^DYnufW7_jF$8*bEoB1;Ap(XR0FLYVhIJk|3Z(=7-
zoj)WcdS;j;hz&gTYJN`Ia+Jo0AD8Wez9OfudBjuzL&K5j*^7@S!l>e9mHY)JNcVGP
zT{}Jme~@HPD3PY7Hq9=?pLlY1KzG3YM5)MSPDr{fIH^Lu_O1OphW0(52u!=!x6*{Y
z{JxeknnZP3<JgI>Fv<|I=pLQ&ogsX2`a6m9&;`FnpE--Ce8jJ_!aml1r&U4y&QhI|
z?}(L%Ph|WF^UuEUc(ZdA1>xMx)i*;;PLVPVJhEi?5R$!IOOeyh@%2V_Pt!oSE59fo
z`izqW&Kc>YfIL+v{rz%j;>ITP+y?Q;Ll^-&eCh2Txykcs??l+@65d&gY)yxzfn|JZ
zhbDazGD%hIg!+iq2i)8~?U<#_^eUY8KX2GwPP~|@YwyTvV%^&3JR?R4PHS=;g!Ms>
zDmxul&3HnKrHmz!Bu&qHBtJaJA)&RXV19jTz$+(fv#N&T2aWp;cM}n=Wk2NUoMb*Y
zJSB=!YzXGwiq5!-){1zo>bW~{u$mZtzhGlO+sHb}ca@j+jC^%Mi03j(EPwUGJvlAW
zAA&572=N^y^^<4MdqX6%<+qP?Q_%Bad%j1xV;YyaCE)UMPQN-ibg-6s_o?2Ht5`2l
zM|{cwBHWpSC$)2<c1o+GV`FD-Iyu-^(ss`V>$DgWZ4pXQG|0GdNFF+$$-LD>u`kT!
z$d!tdP#A8}>}HgmCDs@1x0h5+gqrbLxvJXI+~Uw3ar#Yw_*)jP<qO)&D2X#N^4<mh
z*7_c+$Gx81_YUh*Ee3K9U7Q}_Lt||5yY6i?{?GT!10L+>e)mbnzrK>q@}}dwzf;R~
zBiVT0xmMevj#O@I3U$SA?FNS=Ep#dS0nHhjLfC?{veQ5j$Y-VLczc+(tTS35n%pX@
zZ!b%2w<EYt#>_6J*_UB?OsPux+^$;h1TzGg$(f`x*=nM+GPCP6*O}JQ$4O~nD-K3H
zDcQTm(X>44o?Wpc9xm-LTaMf+Up;V9(rbU1k!KO0c+YZ9H}>=%dtY+=z`p4gkJ3Uk
zMm5PuKUUnE0gs+gQi{b}kY?Np_YAYkS)7|9+lmVl%*6MroPN~e_mTP9XQ_=Ni?NPR
zGf218wkll$xgNKXTJDqV@q>>fZDu>QCD$CNmUSnXIolpFbQD&K2g>V@o@M`3Qorjn
z+b312O2|2Q!pKydg)=aNy*xefP<)*3(plH}BypZgvF~|$jw4q(Uq{J?@8nsC=xYqU
z#mDpwAB?Q1IY*Fq#9BB~6)U|Lw{|@F!{z0Zo^nxca)>6%QY~RnJEkV3e@ASq-REtE
zjueTp5NYB}9Z#`e+_-(RxKA(t&sW2*S$@!YtIjjsDMIuV`|}Pw71?_!1KN$oT+5uO
z--R^(FLCUY9418$DJN2w=_7C)aK(UhlWgL+%GG0xHJ}!L>-e727@aEnil^ED@1o#q
z21&VQX^K%d8F%hSgb9g}{R84YoVS9#%l5B6SDEsa-z{p(m{;0(^3w02DgGr5j#r(I
z33d8`&A|loITSUI75Fz=&a-%N)ju6H>>*1I=G$NN%eZ*;ukR-w+{qWH?CS(>Q|y(I
zICN6dr;t9vy$|op_PaX&2p+0@90eL_=davCm@e2|%!oLmW~^91k5tU6ApJM&x5~p&
z5K!2YwfF}N2S3jZeJDewT?`=DXq!c#qu_UYU(hCh5;#I+<t;sT`@Zw9mOl!#-l>IG
zV*3cJw%S)&(N*y4wL$MGHK9(a2sP*##?{jSSzuNZWPz&0)(@jNxCRN%IFIX)2o*Mo
z0@M=5-Ui*bK%5zwtf+W|tgus&z>H`r<Vju5J_ED3Zw?GE|3L{OKXC$j%iuG8`M*zF
z8-M)~L*)#a^%W3oWY<45!Oz+unnsg)W6F7O($FL)t-@c|=<!DE+e=y|zedI3v9WNQ
z;QlKEaiHFTQE7eB*Z3atjT|=kbFu<9muuN5wPoQu1E|ezA>WxM_}9|1e!0;D9ze%+
zw-;iFGNUbl3}r!yIaJ2|l^c^dbRpZ7o-`((3jm6xRhy%wcO^79q7lvvS{7SQ4@MUi
zQD;hc+ROZ!&|ns%*S$3sJSWQh(PO8e#QILI|FbXE**rzze1jP&CIo*)DLr^T6IFN>
zF3t!)XmjqK%ilkI2GQ1R-_=dx!EaX-({?ZacQ6^JavCmLRYV;^K&8O_Loxk<^iNXN
z4JrT$36R8dAs4RaA2<6c)3>^X@Jm;Bv%tvELyd2~-Rk;`%}7f6WpY+xKpo~@=rc-m
z6bZ0G=74FfBrD_Igbq&cfz=3LWy+$S7YEcBOiNtf1jUXv-ChI`0_;=Rf~|fnB9e-|
zkk^NKAOl+?13Zq~6!41<hA-68b1h>~Xbu_bg(iFTIxA#}DyS@j4I9V`x*9MT9X)3c
zSx<*$0fUAte#h`jpr!o{{}^(tn@p2E^u92yrFh}jOUQ#Wc?Fu{|5X1wk-Y8#{T_%w
zb>Q9wV#NakPoq8PMjF9|1I+DXZT2W>!Ocij6Z#Cjo)U>|aB_q|k8v9`DCl`p7{vQh
zl71|JXyrOp*a5a;7~(7eqXdd}zFs(L72Kvvk*9`8LvW7-86^<NCDbw@<gl%^0YSMy
z#v1k?bXfpbaK^U258g8rxzP1q;Y9)N^bBb1;Gx0*|6mhFlKu64|MNtWcj@fv#}b(2
z1D$XO`~r`G=<#ts3dXV!*m*8FC5Kl)8Vib0fwTvmsX*N}MDFjLzTExuU>uUGamI+q
z1wHl><SF~0hZDKERe$Aa@n-Y*Ap+|0Wu)<2ftR!LHsnrDb_!CDNe?D3)Nzk;f%e`E
zazZJ1r9>=S86ctqbJequBf2i$CHC==$Lmj!lUY_efX;2aKo+)66ems=w!k+m{|4+2
z{lV%8u<??a7NtZ=!ih!O?O>e)Xn?`M@j&P_e)|b|_2L)gxt5t>Lf5St5bK?Wj3PZS
zORE2M6nUi;Hpo~9y$wWHfbk~gkLWPOKSGj6bI!0Yt@>z~drZ9c0bwEZGHmX73u50z
z#G9sb(c~hm83p=rMl&oP)CxxU`baa6iaU$*?=&)A)efkh)cf^Z@L3O4@9}<B3bPYr
z_02Y|=K{$k$PETdcL9}pBAnB?>1YC|DXi)wjY$yd>7AKfbV>#29lUFnU^-C<fNtuq
z7rgQf@}=@%Y5=O)Zy1P{`1D~4AFpTV1^XkT?weme43;Uwn}H7Z1>u!KnDbC@lU|`-
zg_hb(zsZf2hAI|}NL$PaJ}ZNG+(Ks<Ha#>&h1Dy;Vfa)q`LhJmUHEK(dtf4yKlQTa
z01g2e_|I?k&w~b85jbMo;$l!3d(zFFYAC-!Q_1jn3xqL=maFI@Q0E*3`nvT*C=)MN
za)$Tne^PzS@KYCg!pLf2(jKMPOigS`H_TVSj_6<mnGo&nH`<Ixq;!6ngz=6j5dS!N
z&s4DZG2gf8JO+U<cx}DKZ$sDRc`k>f;1^JTIS8@vQiCUwh;)F3`k48hfVTcWW63UL
zEP4IY;;7j!xU-t2D8$`Lfidz=!$PDn40P>w(0PAuDED0X5o;lzmG)M?8eDYnNH)pr
zj5a?v!5i}HNnH?zVy{3t?uRf<d%vYH_}RwJmdLLw!)TJ<vId>2zG~nEy2eLW1BSOP
zpbMq3CpG~6DOh(9Zdp;(Jyx(iBq&o*tR8k?+SfmtaC!o>NpmKjM?2GHcz~v(Qx|^3
z;;AUdES}D4>!M9USDKdVvqNyqIXm}sZCk9~m`|Cf^9uPz?26OR1&c3w_=WIrNF=<Y
z=HA?Y!zGG(6~<Xe8geLk%=RgX;b!@MU&EBu8VJz4nBe$U_S3>(98Sk!ii@v-RCWYC
zCaM0PNo3`~-$|qkz*Z(Iz>%Qmx$j&(8$4-;?X&WJ0u+6zq{izXsha8d(>II5ji`tw
zbf1+4Ha2n5!tDl4hWpeYLE3Ga0|?+lJM&3gKbkDwgxQt+*&k+yCx4ApOt9lHCN~bA
zbdOCiTpmw_h#wK(fFe&LTao6Z0Jj7*f0NTF)jHXZ_>4;7V2|T0lxO6^rW_NpiH+v4
z(B@>b!Sdl)p(e?GHFihp_sq#k-U!QY=hdVTpHDcZ^2nyfbMey7rb#GD%t$y5Wg^!v
z)gEt&xc5z9&}=zkU1D7%&P5MDowcM$ETNF`vDMF8@1vS>5ce8r*KTnV9`bqT(@&Ze
z-X{IIUbto08TZr#SP+Lt%8j`w%u#6i?<uaWd1a#JO&p2uEWp_y*+>oUiFM(`j^JNC
z&r`lq?25ak!|i~r|5_ZED(C(+Uiy1-{!^LAoN<lU;E5cj47;aauJy=%B4ou@u=f+w
z#vPe^MO<l@P+<i$cy!f)EF~937O!_Cmj}Y899Is9UoKS)4`SMkK6Na{S=gjHx_Az(
z_o=RcuqI0!?t4W=@#<9_Pd5c-b5!Vi^DCpTto37+AWzpN2G*_!XKS3^+#74jvdFp>
z1Wx+YyZ@4b2IjeKp+kAXMxM*9PFB{w=#RJg^mBVIozZ<%qRX)e+m*YTE}q-9i+UI@
zVIl4Egpc<^>DB}A?mr{VV=8n<?TbXNhf(GP($uiLLw7tJ90BWG*|H+<`#kg-L6(`4
z@W&Mj3v7g0n#Dz`Fd0jd#`|3wmc*CJdG+Mjy1-nkE!QcLppuVS{zTn9ZxqaY@_nr#
zcJgKL3Wq8T_Vk9D7Ba$Zil1+K4+~KY*^%_OYsa?9ykmIvK=a2!-+!v#&lg>?#k2wq
zFDd9yR}dA!1V+2hrl*L)!pY;x8M;|*k}v!29J<Rx*FL@Fta^U|ITLW%UcPcS$Uw%2
zN7^NOMHP-Tce0GTn0N=b6a<s^H8_*(+KUf0#$Revj%2Ig>*+iS$%1+WDM=EM3XT4}
z!8JocsJpUOFC*?Hu4OKiU;6bKySsv)AX%`U;(I{|`R>18vxZR7d>py_w@3|62xo~@
zl(u&F48;%K;bLVcd|gTQZItYFs~#yOb_Cy?pZ1$ZnuF5O!<UujX`=dHUQ)tP@_g=W
zrgo$C`;~f24UQ(Hh_?G*jeU7Im0S2WI|&)KnauN;%$Z`FrJbY*$sCf5DN>o|X%iVT
zW*##ZMVUfKG89F|l&DBalD=o{I{m)yx_*Cr=h~+eXYY5fcfIes*7MxYecw-{HCLh@
z?uPyIUQz46U!>Fa4uY(!>TMr#P>{7iAo)rDwubC1D=E~(sJ%Z9mD&+wL`M`OKEau0
zeTNv)KahHiSljf}!$}PfDv6l}L0q=mrDn_&1#rZdDhxrDCL9HxI5I6t7y9#}^jtrq
zpyZWEe&P&jLl~S+CNIXqm}o?f8Vz9e&s>A1!W6;~{4ou9ox0-y#lsj<{gQukvW?>C
zX9X1xiE;EGB{1@`<W5pnBfj3N{NtVIjl56Li6#e^D}4sx#g`qn@;$_<jtjCX%=R)V
zCjoLD)sSsP1^O-VbqSJ$OZ%vZB}$4g28h}7buO+5U@WXxVz<AwD7m`UL*(6!BJX+a
zJo2XapfpEgXh#YGfE@UK0k`YZo7UGCM8~+%c}E@1Gf3h`D6D$-l;ov8rtGnYnqrs=
z`_?0E;PMO=k=;~Gq84?6P0;#5O10|p0o6t;GK>L<+}-BTwX_EoB$TE7{4-h@>tsy}
z3kwkHRzZUoAaQ}?^PE<uYcje>UA%2aD}fs^*+bD~uz2?bg;MLK`xlHya~$bchfh5>
z53Lbll`z9kKXE)mDh^FycrTC*F;eiVXVj6`dKd!-XZVGUsUYT)$czxX<CNxPqUnBO
zJk83`rfd0`AjQe5AQlSE3yag*X)9d+T~X*cAQ*a42nK-0z|XtOL{dfNofpg>HxEw1
z23==PR3T0{E@o$p?uWC5W5>{^;dl4Rb$G+zDpO#AIrKN0CkqdXHH)dmN{WvDdEY86
zT>FLWT9YI5!ZR%F^5=#NAx8T2s`1D}Kd;1mS@_z`SdzL<H|ASk-Oo+u6$+RJTZ<6<
z*&VNo#ZA5a(a3dY<m69csv}0$#N&MmzjY@Oy)Q0-YK<{?pr7}bNqTOz$TtuU@xCLg
zAzymg<DGCE&WI6{k*2vKamR+rNA{#4`1)fxF_Ual>&p&bOeqG#S=(})tjU+sIxv0{
zW&Lp9)Kc1Fx0dhqB>!4=fagN-QZ@}QI{xx&Zrs*~I&^}CH<0K?<iglnN<<1^>@3v#
zcjkh*Fcn`i%5~7}tTK|iZCwA5u*Hx0zwgCd${G(s?t_wt(YS4x_c`ZcPU4+>6c&w*
z#ip_`F(g9KY7!?J+n{^D?J2x(QccD67W#h29QgjeZMM?gVglG)>f{2eK60(yL`72{
zx|!cbnFhb4=n@J2YBbZhW*UH|Z6*+&o}u67qJo$}#;9ZDl$n1o6`FbI2u1LSFySIT
zx+z$Ks*Yhzx$$MJ!ARn{_oo*UX3fy@g*h5@Efn0(nO(>}v`}y!<871`g$*65j2S@2
zgVY16%-61tsr=y-7-Zo<{{h;DwnH~nt2#|QlWxYAXqy?bU@>K>nu{;zflNgC6wCpa
zmstrFhEs5xLIRFofrn`3Q=iSJhO%lS=rv<Qxou-L9CpCkY6-&FI*9RszX{QAVvP-7
zxeb~&!gUNhPS8RC{3k^XqiSxDcLmU0HYpyC3M!#g+FBHx)Q0vFPN!kpYx;VkLqWU%
zojDL(Gs(Yo_B1OP{3PGK@XGjLHe=MY`vnIn-#MoKXGW{j_LeM5<wbJ~hbwUjh0npO
z>j*61*7k-(+u3&hHZ@$0O642!Xt?kMz0=o#tV^K?Dg-J;l5f_qJoX+GO^HfC*P!hV
z8N>Q#ykwdUd%2Ce0PlIiGxC#DXwr!<PsD=|6Dwjw3#cu_&?S*}&hJ5AEoDV{cU8Hg
zJFHCuHXA7n;)G1+VR=zp^yi}#RHPQ6A+p0CO8&d5I11Q~{=G#h0qz(T+DO`M-~;#C
z%$L1Ervm(AxWiRAvMEJLe~|CKI5TzuzL|u<z->=nm-lY$o%<`gpc?o08XYBO@94XN
z1!3(r#ON>+2_@>nKq$GY*7PA0C^5vrz(`<`Pf0S<;9?|r5FAb#{xiLuIRFRyab9hV
z<L{`kq&;w3a~7Crp<hr4qEGhP(HWKi3;Dluf#BlIE`F>Q?x7|h4!AyW#UBb0#%XcU
zZ@=u<@g1-sbENt5(y#m=x)q481lYxt^@_sKPgnzVhJ;S;6r&^m%c6BPR4eE`L;!GY
z;KeHn$D-;psL1_yEIt_DzqiaN!NJ}uajLAc!Uh-^Uvh=mCFD8Ut=_v`6wU+sH`7q?
zse5p(F$TP~TtN)|%(#{hb*#{Le&O-)6SvS!Kzt<=yRu<T!_ej0Ooxl}Wo@SXd@@>0
z|BM8-I%7S{Ul-BFC{rFM3?-m<P$S6h6c#KZaNZ4Gf&5j;d%<9^|2QQKca32a%9C?X
zZ-3>V@qlGpnQI!kpaLy#;-)NkyG>HV!<l`dFN!h;3KaALdOB)v)nvuOmF`HXy@j-E
zA7ssSj!%GIjfmWkBF&y-BAy)_`y3!rDJMpO?fhV^o5RP_a9|rh@Cecp7|*qVlM`8Z
zL%xa8q^Tv8OadGNyU+(96@%UCAQ<+qc)xFfTWt}|(m-OcB|#R3C#HR?B?LJCm){TN
z1AE|YVQ(Q*@(psrWr+QO690-)B+D4+<dB2d1Bp~w(s%-<ZjK*rwz-a?E=^wSMfR^C
zdXvBK>MOt;vZ05d0}?IZRJODk`Ur~oy}f>n7Ik<7m<E#a6w%{zK#iFO*U+T2Gnd%q
zj)I4a$V+g(IO~m)8Q@$?ST(I47Rmk!&8x;)Ew%xJ0VSTed5tan2SisFhHnI|fOW<V
zW$NEQK>`BQAO*N0IU0fLVu162Pw;EG#*2MoC6hpv7(az)$wb|~ZUV#tRF+bjfs-4z
zE45aVXgd?tzVyP#w}By`0GlAFES#Yk892}2ty`{A%UqcPJ~w;{`y1#QxI9P@p##wy
zAaF-n+;iu8!XjnWyTD8O6}yBeHAYE6=^$Rp#DLSR=VFF)0=rx{=Z8Bm+YAVajFvV{
z(V!qZ1D;$%I3Ov@bUx{1qmjS-$;zj<lRK$Bl*hkofHm<_7n#w1!6twY3`PKkfY)Mw
z+Gntd6~qldd*cLq;PKMaeNnvd3L#~H1=NVeKY~U0(0rU6_$Ke3w7>HGQ@o^fN$FR(
z0q!PG>}p~>nxppP{&KZIvb5N99E41MpPJuEHO+)TpWoA4(H3%ebpsZsA%h^ZPLC%{
zXAg8)H+8N;a+E;+;d#NHy2LB7Ok%@jCq(Ayck^VD<)Vr$G*L3Yvij3*umbQZEbay(
zA2m>&CBF?ZlxdY^DhU;3bEs$`DL&qg^TR}aiB}eXP>%M7vfHg>6jP$rZjMY^SLwHL
z{Cb!5;pp@k2g}%B<Yz~K@#_<%@p%`W8m<ZSmvHR_1CL9CWKlX29Gx5?;XTUsfS%5e
zEI73em{`EHUK~n4Y~W@D{B4QAoo#4ym#%CDodiLZTxGREV#Zi0AiM~ThwLc`0%jPU
z!w^ssjJs@k_Mu+Oj|D9O0+MM>*ACz_vwjzR+Pyja3Von(KgeP89Vs}~{0JvRbw>rA
zmsl;G7kZP-W>1~*WrotC$L-R?jNG!0z0o2$E*(I((xd#gH1ZA%lY9I6I&_@Ro<i$J
zG}%>ux?}u2%ur>sxGIqq=$W#9{|=`E=dev2WBMh(hI&E(P=hJ<PMS%3=bcNx-r=pi
z1uZ{rS?S2ieFL@)zq(f6EC`JaAI(N^RGwgSf*aeJ%YL7X9kuvC?1icOW1MDXCXWpn
z8QON3<~Tw4W*CZ`8a2MWhxNxDIt0B@9B7i62uocC1G2{Ca8`RGyUVo)d^#s}mDzIR
z>>}hr$RU?70CXY^pa|imwv0Gx+<pNN(5ucb^PA0K90+NmH>xcnq!yoJoo!{rj#Bak
z%FxT)i?Ue$_R{7N=tt6{s1LX0H?MATsE5o(b9EbMDipn!j)e<df>7s?&XVqu5ZqPS
z-n3`bcujoQ>i8SLA3?hjvxEs`T|93-)Fnwr&eyc=5(pq4kejZpunai!DM;T7OWfle
zU1?U8=_?K{+2)3oozUFkw4Dg4QzOA1G#5pOWEhXV;1d=~i78{Hk+E$fu}a~F4sK2b
z>8EZ#Rg%+{>`G~KOUrPVi-bmelCl)Xn*e@d4CNpsX>J&K^)&=viEk=9btmU=VuD~>
zL$-3mj{9VjOawJQyOt|RVBU+5rl$6D8{FQijrn%J_@2t`APOZxSoSzWOzLK^5V!qE
zJ^~l8)*QJPn@gq}*qe}l&qhuEz6EK?9_s@zW4-s_i6f$kpi!BZU^MC!<;II`>ffpl
z^9m^gGXOt(5gF7!2Y7mYVM&K%xF;?t>(*z`3XrOVveb!k6)-JIe8tL{iQb9|Psc3z
zHMK~abXyD9i8uDxD$fs%9wE8s<->AR&lVmGzS}*9=aR#JKQ3AVuo~j|ZsD*4No0mf
z{0Pi~mtZz7EF-({<?Ie7gH@aS8u(@R@>=%WIAUeGe0t|IPL%=;diD!Y{)KNF^X3$t
z-fmQ@q;!~V%VY2&kFT%Q7=I(ZI$ot9+<I-_GQgAo4Ij{x>@tQLM8=Jdov;J+{X^zO
z0BzdnN(f#Wn{}S}F*KevPzGgslBYe6W(J0w5$4Y$s5@->t`5nMm%@E>PQ$9jPbT4=
zpJD#^dwNF4je3+HI^GPjJ|f$1(+FB5#~~ZPBvrL*7Hf`<*pA&|on6@eY^8oJX{Ywp
zNA3I_UF`0Il5riZY`@wqjAo^~I&&0e2SoFA(@*lR0jFrSceXjf`Pj-WijDLL8jgs5
zE;WMmQJKTvPcdHL$%?f|N;tIm+T+pVgA^s4!ud4_{XRgE0HR+9GJA2F+_Nl|x)NPx
zIU~INT?l^W9TGDOp1Sn<f<ZN5U^7&%B7Cnh$#JLc$G()ymed&hfx1DSrHDuLgNFgE
z1du*p>4?D-6d3j@LpYV<6CvL2LFPxkyo)qvT4`e+lac~#yDu%(^#pH`&yEg^JB-=h
zjA56_S>9Cjp=#98TREimS-C5g;uLoQoxwl>*Kw^D3$X}4em8YLnOos!y65{WX*Z_V
zfAgHL98gh^=&odPc-22}=(redBJevEC<*HCxqX67zUi`_33H@8&?WxVKPc5+o-QDL
ziM3+-`Dth_R7_TH;s#_+vvWYY(#&G7XRn^F{4FbX80%HUCKo5D*P6l5o4~_hJ?!UV
zakf~Lq=G56(q6YMn~dSXB4htsBaCY*RP_uN?{ln^Y%JRpcfYt}V*~wP)`NO7(k%F>
zb?JY`hVPokMi&T0rAr9H1V36}6_LEhF(`X1GMLI)z>%;vqn8AB>{Q|TQP)D_++nj5
zY`vjv)myt?=6IBzot;KScU|_P!&|P8akZGi!>wgTp*P?DBz&+u`4IT-IQy4Z?VBe@
z&Nu3i*<%T@7pawcvbBDV2U0Qhq|F=g1O~}OAN5K|jQvTm3*VqtiVbjBd15Zm-4hdi
zJSw%@pX8-S|4-FRQUk}OUgBzB->HWvi-ojxzqu&WDi4I>KRobb0w^Yq!bAO0ck%W(
z3FA;<P#QT;iO}M!nq7A14?91|pm4~17WjfIV{62pxl9VZ-IEUjQ&!rT%oMPQ(maQ$
zaz)W(TNQvYi%9lJ{E3Uq&(2d{dz6aaKpk>~1v%4b6z|R$p<Fh5{<hg!0u=IODQrxg
z)6rklNXpee{+asJjlnW4dV?_3M9KIBcObU+(szNtizix9W&oo_Bx+rO0>5d78GWB0
zLx(m#=P3IKlWvf{n}1DP;lNLjL*cmdcFOB9G^7u+$LCEnN%jT;vwD0kxvF~N3W-bu
z$c3lgp9r=*dq1A!(eYk7-3L{su89<m|3(!N^dm#=IyP;ZeD*Uqiv8F>J|M5#7=yb)
zX|Ss(?Vjs83Csc=b#i996=vI{@<+V+2}vhD`{dK%hA6dyw44myaE$uKz6?ADOzbMa
zuG<tCVa(8e(}N4O^~7og`7aRI0MGD1m0Ty$u_Pr5=>ujpwwki5X`Y<QsVB)=(53PB
z|2nz>33Kd4lrX0;jz7E>0Ft21{04|uz+C^~$ubiukD5#o%wfJiNsFOjsA&{MVJ1vr
znQ-NoDT6%l5?Cy`3luORnuTt9lUu|DU_T@t!yy)_O^GBvc97m?()&#Jmg-gu<b4#h
zIw<IB`~?Oklvrf=g((;U=y>i!nU8W@8}~_Il=*Gr@|4B3GIYUAae-su&e};2bV%Y0
zK%B&LN_ua5!nR9q4oqF=G%vZ<@h$aeLw2#53UO+<U}~mm%IazJuWyP`?tJmOsa<x}
zfQ0?xLNERG<6%K`4@#tjqJzw0geVJRyaeE8ZT31jJR~fS)!<Kxh&dVFPwC0-Dbv)F
z<Z4}ql(mjYCzT(9hGF4We%u{_aS|9UvkiCAh5)J5E-7-24g9qj93YMKd7|RuaMBo+
zojR0?cHZBA-k6O+7j_XcKo*UI$X~xP<#YfG3Ae=KO6g=x)C)pf1NoQVocG77I2Wbl
zAOnhKz7wG+$Kut*WHKGWX4U;j17stx$LtU>r=F)oQrsT>_XcDk2!wrqDroD!-Z8-d
zH8PFopXx!MZ=)x@An}&s27~{Npu5MuE;bcC9D8M}@-6Pnacz(k3fv79$+apGNM5@r
z_>&w8oqZ!`jOTTEn_>LP{UdZmwc<J)_@8#XJ}>q-5){)1*|iQXcI#U9Kb%yOc0I9(
zv>84tQ<n92nY@>K3m=?$Wg<G&Wts~y>MKZIi@2JXB}U0ds(Eh9D;RU&ODG$bE0Rc`
zeA6x@7e?dux^(@i*m;7DzT%tfPoSZ)%=eO)4>dpW2sBC~oM@POa@W|o&~tRuCYG6{
zG{qw!%2lawB^u_;&RQclI{df^$G=q61f`<8jZF)Qsb~~RMW5T2OXS0Ww4&deqZ?=2
zFbHHLuN0t~#DH=^wu5hp7|&;V&^3JT#_u;m>;$G=W^1sJl<^q@rU_nkk_u-lP6A+%
z2Y|u9teU`vb|pB#m;atYF_g9+M$#8S+)4T!ei$F|mgOao0gv}ytknHdpy7iW4mDPg
zTz(8?ddRk*i@{sSl*}s3K0=d_wD5Nj-2wI44D33hdDQ%BzNyxshMdy2ILPfXE<RYy
zJg?|BZhLl36Q0^1w%D=o1%{zx6~bvHXiYFU7!03<yz?AN#y~*}u+|TtJb!|078<)X
z%mYgeN+*d>c)1KY-axkMci5ZHgJCBOW!i5(9vUewLq_-36MAWY7VyCF+hiXiB|)s&
z4n%x%@DK6U5=U|m5T}{KTSlfN#z0g=;KsBlE-~UT+g76xXEt|t{N$cL>1Y3VGd1Yf
zjvQFPRNUIINWIv7nx+QBEMcaz<~a_feF!ov*T7^EV!iLhM{m_E^V$%UUX(%S4_Xez
zxK2nZilCBgruHK5wflqkPRJ&0d9+StoPeCSr$ZiS^b$qPy-bmgW9NOl4)e~%cO2F$
zq9^^%xT%rp@@QI&6N|yKRGz+94Kk5gR$SZ((%=Gjtst&-Y#&~PS<Iv>8<*bDF%UXI
z?$x<s3bkjv>W||s^@A#VKZc0)y8Zy47O;{I-D>BRckCrVaskLgD>M}WHJo-V%YJYM
zbu`)dH1!j7Y<|FFsERX>qX6r~*D;i~L9u+>dh|Hj4|NAnAfMK`Z_)_(gfu)!QNQ%p
zA+JA1e=J;Q<MZ@y2nKCnH8yCWJ!4=Cp9T{_|1X~tTm8FB-a&MAZF4rV))G3jltPme
zSlQ*yRYS}RvKn>pKVcNc+kUBl47RfM&WRFQA6LNZonoF^p9E>;N+b#gHv!8d<m#Qc
z_Cp`21R9$&yLQb%(%!Y3%q|6NQ|-Z&WCe?LzYE-HpLFvlvPVd?X><4RT*ZbGT7>|K
z?81yf@p<oomQ|2ut1lJafO2^Fv4(FCu-A5WW==n44k1VS2m_mHE9J$P(D&Q%K#Z8;
z&-@N}6t1Hupyc^Uo1Npu_zY0EJPX<cxdO%N?Ptr^tz5uf<9^eygcms_?|HBZ{tBvT
zX$7auT9BQ<cX1z~+fZ}UZBiC+S85FWeVsD`p9zHSTgnRC!RGXr;CY(oIm+UQ<sJdf
z%E95jLLEsc%g(^@6(ifXE7)so69RBos&=oFvb*@M^HlEYDA)kG|7Yk21yWz$FG#fn
zemY>JCuy9A14IbaBMza_x_aFWtKgM9r&M?S*OfW~^=S;Z2uq)fL`A1WU7aX_3geOq
zg4Nh_waDhdqOK5%zWCs=>N6Z(+P=VinF4w}ND1OZh5$Fp&{XwapqQiF#qdT;k7IUA
z+um$&Uc5_eG7fJ12&Xy2wqsGLXUkmfBBqe%F4(UQ#)Z5h&|>g`dBI3650A*)cI}1N
ziqyJFQ8TKt&~KCbdRE7So5i-w(*5tvFUR{(){Fba*RVYRpsQ&JoJ)q}(^AJI4Yj`3
z9H)tifxp(xpES^2dVjiR*_Ib<35102_N5(oRw=5D?1n<f9`J?~L+2**%W$*5C^wbB
zt5QsWm(uNE>_O<O?)a$^ctD%G@^7XqI4m;h^OIo(1`XxaA`x7&-4#k>-pWd9!`Sw!
z>=+4UmpL=fe9ApsEpOCkDMgMEH%S#WUjIi2BSqP6>ive;b&(Ls6J1N{*}Ek$d5*FM
z`V9_H3)49EcFe!Xin&{-QpkVfrBC3Jk|jYZp!Dtd$C^)J@@%dB7{5#U@@o*2H+JAN
z_Cs5T!wm#U&N^$<)Q2MRR2(e#&%i|^>OrfzCSO~C#wXt5dudkQl!^Ebh|!(8yDF1}
zIR`^hgp-TrLDh_(Bm7`0KI7N*MaJ1nLQ_#jpfkUf-^VO)1V{4osNenpr!Td0AB(vv
zb*M{y`?%XAyLz}87o<Pasr?AS<F0*&9fFsKuKGTUVNeF`&+=SA8gE}u7n87V@r%=a
z?v}@DIi#^eA8^*!KzDY6`4996J`Ty6?rggo`HqssJ*zCErs=wKv^LJAI}V%xwW(`o
zdi_=H?^NDBwa|hu`h$fV$N{shguSWUXKyfMyR$~I{m|=4aZS#|t1(VY+R8cI)Gqrj
zky#K}&_s3I9tAz%V+94#KFPqb92pFon`e}h&ABW)udmJU^qr%6Q%B2Dm^wzQDV9M;
z(JRxt2MGT<e@S}RjDn_PWTiB>w}k5*+?1bQzan|X`2@Sjp`Ji9iHhtXlYQv;o^qr=
zen0TPXfl46?NRQ&`IA%}Dbct6d(QrlSM6aFabZcQZMD{ke9u<p@LYlY>40UNV~*<M
zIo`D24dw3$<?XO4ND1<?p8Ffwt1SV->T23;J$$uNJ2LnYtJ(RpjoOxm!e}S`#1AGW
zm*XQXiBsq<(6%@9b!Op46s`^hjIo7%|H=)kY;Z{uY0@+Qr#rAD%=Z)iGGeDBV!Wt^
zioo0)HH8OH)$bhEFURP%IirF%s$0}T6}E1K!pd*B@NgyD!AJb=zB##$B2EC<0?Pu1
zf?hL6;E)eFU#zGCa8|A6|D{Gf%jPKE;cL)?r*%7A`Z=)*9u;%|;+`Mg=JlVMG#V1f
z#i5FNcmh9B9WYKU4H(-5Yd+I|2mDi%Y}2`wo0|GK(EtMMBKVh2gQwl74-6O#8*Jag
zj=su4sz&kA!|?iY&K9E^jDeIZ(1FI@8rj<L9aOrG7sxXng=z=7_(7;}JRbD>-h-Am
zcG$V~>8SY~R`HYXn;f|Qz!*_MJb+&FY9y3zLD%@dCl&--#4ZCve)J1cVXUkNw=Khm
z`*i3$JdMYMh=)emh6ow|g9xQ3W%3V2@@R^$X>@(aLy;C4VlcKH2r8XQY-+zaI?8@%
zZDu+*PO<$|szBgVC#r`ZQdNkTFbS@k?rnyZCN>WI0~YH3oinc$E9_Q|{Ak-nN6ju9
z7B5j{aG8Akbe3LSE`~K!^>nFO(SP;HR3|v|bL5|bKiIoTe|+@<!kUT33-$XH12)`F
z!nP;@0R2^o;c!y$^^5e{@s??-3I9`v1|jbT`B9d`zf(GkhsXDC`i=p-5Do_!Gy+l^
zg6TiG-D)+3#m)*Evq2K&Jdkdec-bzUrU;r}^^UR`ioRMjk#f76nZgfMpCXFN7~fq`
zqneba`sAt|M^ijCW_<emqz8>9kl&zmcuqF(M{_a|0$*o-zR#Q3`e@_DKwq*#udhP^
z6xN^-2h5g<*Vb+6W%~E|Z)W=EzPvytYx;@YMM>LS8rQc0ZT$F!yuzQ~7S*I#sCIAb
z0~=z8W2Eg{?=O~s!ed~|o#8C+(CIh=kmgwz%E{DnTG;BF5`%6AH+kSk`KRgA<(w0K
zUCWgIH2iS6NQHQ*-=MUjRpvzOA^jOJaG7cixHsn30mc<spg%&_F$!0V!ub;mrXIr-
zO@StH-^@UqD*aJD5Tu{ayju$Sj5YZQhL}&kV~Xm&i@*Ly$*7=s?=@FGYwW<uFyy<8
zO-c4of^7RQDdF#-8PGFJdOHT)ro`7OKRmMs5;9)_Pm33`Bw%avF8!gU=?#j)aF`%V
z=?aE__uY5oH8h=Gs{8kbg*pyoVJR#F<vM?pL7ld15adrD6m|uRMVN_blD&&lqnwSp
zWXXpiCqP7I;u)gBM8Zh?U1~-lp#L+LU`JRo4TX=UrW#@3PU1LpMP2~~W(^}oN19x(
zkIq4;fP8T}ghYw;)~FzTcr^Sk&Wx`7rgs75D;SUbzRc-NL3$B_zyoLQ%MV^Pl)`B8
z0oVz}zu*;TGv0tIqn|NtzAw+btJr~l1;ht$iJ$kp@qL3mO*zo$41}!!yl0x0p9E58
zz#9m}L0n744TRHkuNs!1`89GB0$l!BhHQ8*L`Y+IY;toQQFN}C31tL}g&gqgff&H+
zL!8{;Ch-3%H=#6!YCA7xp`+jk)Lo!JoeLCN_!`+Y(478;I>GAB&(9~0Lm2;8xSDeU
zUvb8>eK#4{Aw7q-$Cy#S|Ec{9G(rITG$rrRpW*MI`{|L4@2N!4jb0g4ewU{KHM;eg
zP&yRNLkjT=!YG%wusaUnKniz%K0y@%kH8K}5$r=KzR}CvY5CFz7LA9mg6B=f4zK+x
zsVji)DwJZOj_oBqS(f!q@d`x>(9;d_BsqN~Gb|hsO0$xMWFooBs@WUb^X}S1yRh;I
zJU(G$XP`5q{0u<%#r}dgwv0AF*pJ=R2W=}T6Dz=R&SV9Od<x7<P*g&<!8C>RQbi&~
zVFF2C#ODE+o1@H7hio%CamV@)Es%<P#niHo>n^&1I90mMJbnqTQ7Ac$QkPcf1t&1X
zc~J&VJ6SEr_^=oR^}s>ptAPjwN;vVH%EN{c6J^jykkiB)d~8sobE8&x#2uk~Idf#G
zXvgm#&jJU^pjLVNCVms90Q`BfRD8>o2Y%Ry_xbaQYmyD)aW5}q23Op_$UyMG+IU_w
zF|dVHq)9p#wED%g^4J^3PFj6kwmD^x-B2d-XuKiY*<ODJ)Y3r67n;3)ovjfBsl4?A
zI;@ZR&HP@++0Rgnz~Ujpw`}svkJjPNKZh8id@&!OOxQ3-Nnm44iLTwT_tZMWENQ+v
z7kj|z$|qF!FcSN_OUS_jtj=Nl($^Ut21wx=4#xgC@RRQ*-!k^h7cc^l3B3!s8U*rr
zx-~tXH?=lTS6M1Z2<;q7zfh^o*fy?>IH-JU90%P6iy;l>TWj$Q>+;+LlApsRWI&MK
z;b2K~M<6py+vE?06fTorcAL$eS6kqkN+EqDys0+LLnoR!RHA|T(`55HR2o;)yEp2u
z{_vi!AFx=U2+T09H<^18`mWK}1p~Diff~1ox~DyXHFV*Hab8iaG34;`TS(U0WGD`W
z9TL;Kf+%&c3am=OTJ$*~`=I3;a9}JhB?uKaZz`OwxTmE8{Q6)ho{{P2Le&tui{{pM
z-`5WMA?bw7yWG!a05upYfmPa!ycRH~B+awGf3{DskI*LZi4}0kUW?eU_omV3&nmXi
z@Aj7aS>E8n&U^r)E{uKd{$YNn{CqH^NKB=(gE(ifw{a4#k$SLF2k8Unq4wb_00s2i
z^ug9DeK!&<gU$S)VYNu^Rfg}bEjhK+GF%b)K#JFtEY~la862S6Ud;`tgvSEBvAims
zqS>Ep_Rq<hy^JhrI7ssD)u)s>5)$^P!#bKL&(p4vZb5-edN(pz5EXGd?&mq*8TM1L
z;_JsHW?;9W83FTQ-f8NhBdQ%*HHgD4A+ZeN$H(XQB<4a^{+KO5%QDW~E{Rn(%3q$0
zrkSA0q3G5YnVEt==?lDUs4n7f+tskkIs(s3SIcl_6?pF+FRP~jOV^Qo@kZd$klpN)
z6`4)8TEFYCf<k)BdmXrH2BolXju_mmnw|ayJ?|$*B#65MBCP)wIbY(@J)F&D$KnQA
zveI|y$j1vzKBvchS6A#<bE9aST-|<ceFGN0tjQ821~>ydQQ2rQGP^FtIH%nfT?|o<
z_53#^EkoSq*JwJJqpyYX$5gr1>mLak9XTkqkMl9?pi#Ij%J;=t*B9H%=`Ztu!%nvf
zcxNstlMZODB=qX3w`0~Wdw|eI>Am{<y6V$B)VJ?r<AYcR^aTnw7qBj%2LO%1#EXol
z!JFTxuey(a*+ZAJkWy{(7qsNv1FVcbQnj<b_#iRy$#}1XYDn4{trOON?9YEw{!8(8
z`}@S|*S2v!xBxMBBL6~83zv8!&h}K1vS8k_VkAlG)KjtslYxIaokhytI~wZ_c`Su-
z0K<xDSA_gS!zPb{gW%=FL`$OQ3tF@P@)asVMs!Jf!Y`T@UHLPxtB6T+s<Uj67z7Mc
zzCS9kWMG;X02^hqv*hT%jX1agZIlx3ro`|Z9dU*m7KKmjd!cyn-#Jzx*e>RDDzQ}k
z4q#Ivqjm*iM1qf*ArT@Yeo_KH`EZ|+Z}%}WF!g1;9`*FtArl*Pl(q|G4Y3Wc^>jW)
z-wLt`TrCA_1;nHP?Jes;`m=v_ZQyji;C0SQfd(%)vJlj=X1DLcuIC<4@^IAkD(qS_
zN&4K)u;xw&d~O9)f=R+`1=8Id8Hb2er|+MpU@1ad2sF&7Fs<0Q6Zoq?YYXjM;0O<U
z%2z<rVK3FuJK_%xX&PW~3mBC<cvL~;1rLL_cPngQsNsWElP3h~$l$EvK5VTekf(u~
zK;v`EI_%}T+u&dSqzOQA)Zq)3Wu7QCztx!_(p{+2F9iGG{^bO=fW>6|Jm8E}{0u}4
zqXqhO{Ai5_a@t#1ZMtD6BAT%j?fl{8g)!Jt2C4JUoWod+tlj+$ZN$VNW`H0zNeL`Z
zuYf@W7>peP68nik=q~eQ0+d34?>&HJs&E?!7T&Jgn`>PKxkli65=J5O+~m^oiz-;Q
zA?)RV?GG0GGk`@oHAS&2y0OWOLeGhJ>GvT?4MipkLjXq|t%h>IEJFi?AYj>>*J-5j
z>mml(p~zT>&cZ{E^gLqCfVTm*BC=MR3sFqW36S~-hmUY=eR&qR!<f`FU0GQPyXWL+
z(Ib8WTQJ!}`)JcbE5v^D($E;5))^B8>Y|`$q8@qDd01477r=N8JTC4-7b8yli(=55
z88k^%hQL0Ey*i6!?^?k@1O^wCLsck*d?~cOo5>rZ7L%#%AL~!~BKYw-VW_WmHJCya
zf($<-;%2%wUI)B36lW2jI0gNfQ<WzlUQ@Mng{X&<Kh$Lm91jP??nem9+U?lWpdJpD
z+PiglNimdHA2XEdTIlc!kkvIk{6-H9Gn|C2jSSZ_urg810mFwKDE~)sS4s+B2ecNL
zDo73k*8opFZ6e%4P~x<I9G%3VZ3LASd^Kb(_OIHfCpa0Nak9?>ixvX8c*@G~y{wrh
z@WV8mN}ka4W*{kpZ9UBinX+_U#pA)i=eZvKnU_JZ3m0P`G;XnD)H0d^K0oM%aBUfW
zznFu=C_N`cvQDw<0_9II)Jh%aYF$n-bI{>HroR*|z*X_oTVT8Z_R_QPVC7$rTPGYk
z3oB<;C!3he8}lbMFPH!sRcXM0i5xFXOMpZ-Svu3+Bu&VAq2OFAREU=??#AbV<t;ue
zV-+ga@iA9`vIF$`AY-@_rog^P4+C(@pOt6@{5vm{<a-8DNZ)b%{1JLCDLGbC?Zu8*
zj|V&iaTVN|SHXF3;Pk|)J|H)BI1VTlwQfhv0?tVP=zsQa5zwdXPdwJhp6`@waa1-E
z%)Hxo-=fQz0kUM<`^S|w2El(hgeaSp3Jqeg<zl(Z1Bzda!xM8@nV%mj<zYFU4P_Jj
z&5zTM0s-+elMQI|Hw=&pVDk{HRO;o$8!v{|*}Y9W`>K{fZueaXQULq<b<oa2t;r(V
zNvH+`dbERsdYpB|D=>jQPbgW1m~N%w{$vrP$V)3+E4YomX{D|o)w>6@fSn*Nao6(>
zcrz!~R)Xn}FYFIMBm)RJ+bik~z}(>1qeTKXWy#M~y6h+-1-J9OUN{NIKKk|Zy9SVw
z;?^jihkk9Z%_N_Buk8o`^$@;Smh-m70T%UJdQSucpasgBkSxX>+;1A>9La}VgJ4Y#
z3!2*ZL9i7_qmjs4Ry(w)Soj8s#P~#V<d``CVtSs?S)Oj%S=@G=dz5^6!NNOSo37%%
zC&v}I4qk_7%ij3+5#lzqS=vqzYu_UmVN*4BQaXQ&41(ZC5XXBq?m&QOPMw(l9%(eJ
z)f=`!mxj}wh?TXI$L>XhK9`FeOGy{g8#_*j>;2gqG>Y@jx2==;nZ{oh+AIDJk=^Po
zl_qOkC3Qq?`Rx@{^_bSVTimBq6!*t%8m*|Er5@V+B<&kc9h20ZW#2(72dU3l`;x98
zo#5Uk)Thp<te&5+A1xmfH_OQQXBg1(VKwZopiL%;p`0w(&+f<!q?0I|JbG?#GZ*>M
zdvcIw^i3)9LlG9GiLtnr7iO0mt*hvP<?l!n6Rj`OisKDFWh&e;&e;1zFol(*H2zPv
zLy5LND8n^J;xMwkhvH8i`Lp548bEH&9(Jev-@Yr0GR_-cP45xEBA0~x)c1oX`<a5f
zPzKz6wE6Ewi6c`3KeOI-?NVA|K7!sYh!mnS?tMa-Xzb8}7XN4p@Zxb~Cg|5^T}(k-
z3xIwIs%t%#E@y-ua;Ttg{$CzTI*r5xzpxdde})Wy{x5Io{c??NO!E<g5}@`_l{z?o
zao||j{6k|%3xN<fbmy1pBOTroWDUmyihC>G$RmpEE=SWO1`J$O^VTbkVq1_qiV#wZ
zmX(D{O-qSGN`-WPTcvL2^Gjg(SwO$9irlbBYIb{K1)7*XOrR2#Q>+1{m7(qLonL^u
zt9i}@owI==7qXp)u6=2W-#<H@^KM6>jjjL$JNwul{|thlMxCuI=o0B;AJ|yl^?iG(
zFfa|aS_5zQiRR50x(DQ<*+2q{<7hT?3m89%3<0GK66Rr}9fnJRp<_fwE|&BZ{ZL_g
zMTB;I^er5$z7WlygwPt9-exO*WS4IyTv{k6@@MVo2@0YtT-iPvqz+ibzx#QsoaN8v
z3$ltzO?M9ZMiF()t%DvQW!HoT+1_h!2XtI5LS;42_%5adRlm++IC-5Fu7&s7OzF9Y
z0(m{rpCjb4T*JN`QFgShB8^j4#`@|HuGeoE_G{OLru6#fh(<de3E+O^4_@RArz)t>
zy~O(`kb2FSntvS<>^>Xy9G9BNeNUKjM1()%VyIPsF$<ex>+RQS7uie8;9`H`17P)g
zT=;3r5ORBGt$PjKHXm1G$Yn>Hshilg6)+!;@{@hlBiaE1qnbC7`sBV_(KNP@FPy71
zHf>HJNh4(qJ;G7qL{zTGqT4m}_M<*%9d0w9;pgE<W0;lg4HMx95($Dnxp@53E*Yc_
z{_oSlErn-P6kP#n!6l;)LsgxK(B?7_ZnYc(sq@zVV>bwBB|KA9@7pR~|GQuRV+|7#
zTMUrMMe}yuaYvDPkT&9U@QDho^OJt$|M>WLzxl0idpX#(bE9J*gyAr$x^5>x7DWqF
z*8+v*-erOG;*E8cQk!uD#`CPF3T(zb(ba%&f=8>+oPQLPt{)&y?l~lI$s#~puZ!r*
zod^T^^1iL;!NgX0{6)sg=dW#r)7(qIQ@t*_1_JU^y3Uz^O%?0634H6{w~!QU=r4Dc
z#!%Vb6b09M2%rr6__qA(UJh&SR>!+%9@Qk_GsTa<wH$$sSoRR6oG3p*3jnV+`VE=&
z81cr^M`3azbxLLiqPpvpiV>G5;<7*oGl`14yZ?L2gLTl{Y900ew~kzevNsXRM<2jZ
zA$BrThw1YSVZg|i=#78<n_YHDHdFXk%gz1;>;M1b3a-?VMZ?0Wk!|YU1<IqxL|qYt
zf>3Nu?L4QpnnFCNi2?2f@ynl6Y(M(_{Wk&gq6ApO@Ra4qpEGIr${zh2U>Wdx%ULfM
z!Kb4|2LJ=;Xt*85XjE$RXa{m$Z)-QKVd-BR#IPJNUgyPRpplDx(Yym_%zd8iUr40^
zJiQ<7gTSkv1Jk#}fF}Gc#gEWxcfc;dSPc-0<?&j97kLm-@kK*o=>vQ{;5L6&mb5_p
zYnVH4BUreVol6-Q_>6DsM5F5D!pcy7CyXz#XaWB8fS`f@akB!Qa5Yr4HK0{gaWL|#
zhH|}h#L;Y(Mu2_XFEqXq!t~I$c}jfGn~!?mrjI!M&uy<XOZW{)`#^UPm62!&m1X=<
zW75%KFsx4C3f~maPXNC6MQ|NG(E62u5f9>YK3RLiIY8m?k}{y!#+_O72S@}6?4w{e
z4sjyTuOmoyREn#SBN%w5X;sR*jj!fMa?9TP3CrOMKy7azf>lU<S$Gv_)=<kcYw`0U
zn*`VoFDP6Esnd6uFQ^LJFZSzL20}dixGZ!Jou$nC7C5JHuwN9muWi@{@4a6zOVB3f
z;>gj1`B{?tb@e9WT*hIuHcan0Ons=BP~E(k38_zd@<$<vmOIM_3xZvy3RRz1lag0M
z{}Di-?to7>==c-4RGqfC3q%t;j@QGKGl;Yuwc(+!VVrr=dU44_s?1H`BY5Tu?6z!h
zv3nN6lCPKZUB@U(DM^~q!~@I`fw}-C_L!<DKLKGdMEeBY1$b#uD&rz{IP0K?HFvN<
zDHLLtk^i7C%iFMYoK`-a=x>NacqhPE5Io&4fg&Cu>n?d9)<*Nz1?EomE?ikRBdFO!
zhWNm0Q6@zhq`a1o)lR`)fYQ*po<trSaBc)6NcLLj!qvTF#}aqnNcE$9<`yJ3ua0GS
zHrOxCK<^gQ)+@eWCxKM~2A8P4oP?@Y%-xSu57N?o29+Zn0mRt`vYr*7iB23xb@_zM
z{e2kn4p?zm@hOXS`CH3NSPm5?zIf{LlH}%NP(=J*&;5NB+|oC1rKWxZ%6^YjpMPh_
z&15e+%e1lS8SKzwuB_TiuSUN1=0Tb`bG36%0qv`~!W%|&W<LSVe{QHle!tyS>Ly14
zNgTCS<2Dcs7Tp?BvUKa9_sXDP=NiTSaI>1e!VN^z);4s3r<Yj+EIf0vA%W#zd?Jzy
z-3u#~cQZE7W)2ue>w(|6LS!$mycwic@uIa1=E27c^y~s}Urxb48Zw1$m66IE1?D=`
zkCAgjp(8^)1NOq=YVr}OeA2M|$VLZvc!asvnVqVD4erBc5^|W(`KY0NYdp4&pK?L)
zO-@>kP^kSyxmOBpo3Hda&kuncpk9Vd3{_cnr{lN&<+tgFYdzIr|KP!b^yU|Nk>?f#
z>J>C@i@!GMn87sv?H4HR`N{3<=4xF*tN$Cw0|Tkcf%W$wMD3ZeW9Tx97O6WNAw<Gw
zZ>sMI8-84ACNxJ8&O+H$%7Dai);a12DbX^4totpyx;PhwB+ddzy}I(I*++!3i5_Vm
zfc?<mT-yORnM4ud`+}dI%r(??FxPL6QWBr;Z_j!}z~B3pH!3|X?(q+EsJ#7Fo@1T4
zgaT!2ch_XZGTa5&<t}o*IwI}tv%`Ac7(hvx-ig?dpkG5&JsciqsVTeOJ2jjSI!`IF
z&z}A?Z*h-ZXGDgq0~51}SmfH>={rkRKe{}1U>@$E*lvsBO5-nh7Pr6`;pj3wwu#i@
z&}{VJ_9ze=o_!E+xu6t#I??y0KaT{bs?fTbnPCbGIp0sdS;(<w97D$<6MbWX-da28
zR)cKX6Zk$=Y>eYgUImmhPnf*>u1B4t8lmUe3V%Qs5+|k+X1ii9V_}DreQVqS3~N_M
ztT&Zir1TuqMm|6s4vuxFocZd~Y>&+}d&arzJs<~GimT2CaEtD2DU_k9+u-JviTT3G
zq5!lj-gjg>-;=Y~>rRGHD2cS)_bSd9=&24CCDmXvdp_UQDL(??yu}6g11tqmzHF*R
zsgjl?-}k9E#kr~P`1EjH4s*2gXO)5cPgQ-^X+>MOsMXZWx}_$=taI#D3}29dVzm)8
zG}y~8d8wA~EF$II7>3FoFivCGlk_R^AHo~@UZsf^k_E`MFef~J%JvcmD*N&6=w||n
zY9Y>t-sr{|#NBRrtWyPgHv=lzfeUR<@*NgvyDx5)Xn}tNwu~*-qA%0&9TJ{7vTApt
z+jXEYTmT6)X|27wns9~ySg2exV9B~6J-{2fbE~G(BEreyyw>n7?ZG;|XoYn?A1XHK
z^ukUq)oS~-&&8MIu|AguzNL$&V;pW$$v7*xfpo}EN!Km$vOs~aT(?4y#G!{)7c&$t
z4hTZuPELVoSk7;Qhn$jYGi<;{u>T{&WZ_&AJzA%BXe=wxa?Q)s={^;Eu!PLbd_rGV
z3H1{fjRm_WCbF*Wd7y=2uyRcg>U<p`FTd-Z-~w&`=2}EMTWI|TtUDR5vmOwTKU12g
z4ohimSd~`Swz>X8NL71|T!*zZ-1jr6bNKCJyS$iVM;LA<f~h~-G_P9S(KAl;R*;Vp
zTxL3oGQZi+-ncwVvu=iOaPMv+XWKVfvr6^~Ttgk9VRZZjYSVpHq8_Bj&%-v5tVaTq
zuhVA4dr0E37R$hEjy6T<?p>_ssc2;+{T7qD<q2M{-D91Goco0+J@;s0&BbIW^9(Ar
z9lM}<=1gmGH9;)ggGrIOrJP<{m|KKlu?5Q`%59(C*^%GFRtigZN7&xILs18%?h7C3
zdL%#oUVOdiEH~k%Estz;0dv2~0kfZxOq)P~GByAh`Qw^=`k^pf*PcWPwWm)fqJ+lf
zgt5gN`2FdB*5(g;-o`MdwkMpzzLw08(w7h?<?Vwy`HU~-702kL;J5G4?)vka_+O!8
z=2nwtZe0e;Zo|9qm!#z$-`_8wYHl~E;y+KrXp**`!pE*nn;|0xjG;2Ag_K_e3po@1
z(3PZ#2)<}J1vy!_R%ENe|6Hq1(5$i*yTC+uy<T>jJ9kprP-4!ocHsp>K||D3oEMF4
zutSUS*mmg!gIfAns>21^5E%YV{CcL^qhdtt9Vc}qLVGA3t;qeeax;MyI-`R5LgPI}
zDAoKI3NE1nD)(Udf6WPX+I$gK1aw>i%IJuZ<P{1R)}^nW8;<*xLj3BcTpRxWA0<@y
zi#xGuQhWLt!1Wet9xq~R&9kK+G3--g_)z=jT^@)%aLI-76PxPW)g!FiZQ;M($ywZR
zvjKya*}ngTl#sHpFXYsE9yv1JfTigYR*{do<{r0YcP!KG`>5BEJ)^XL!(M=UWqa96
zc#G`k&s>ko=E6*T-h>FDc-c+6=Kup4W$2dpACg8OMBzI;^L_y?m6`*wwO;iK^xYfL
zYc~tN{yleaxl7iR;>n|yPuy+K7Pc8S`t+E4c-KGeq}owLhQ6{n>h~|=?t*g>8Z=V=
z6F5Tb2=9^YMcN!<W`a&w@DG`_?|<;^bC`Vhnw(E@`*O)KkWI+r%$Wa-V2N+__b&3i
W!9QN5SZ9*JKZkXVbgH#%L;erfq5;qV


From c5e28dd1a0b4cb6f8ba74bc16760dc6cf32ad50e Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Thu, 24 Aug 2017 02:58:41 +0000
Subject: [PATCH 098/170] scatter check in

---
 paddle/operators/CMakeLists.txt               |  1 +
 paddle/operators/scatter_op.cc                | 76 +++++++++++++++++++
 paddle/operators/scatter_op.cu                | 20 +++++
 paddle/operators/scatter_op.h                 | 60 +++++++++++++++
 paddle/pybind/CMakeLists.txt                  |  1 +
 paddle/pybind/pybind.cc                       |  1 +
 .../paddle/v2/framework/tests/CMakeLists.txt  |  1 +
 .../v2/framework/tests/test_gather_op.py      |  3 -
 .../v2/framework/tests/test_scatter_op.py     | 38 ++++++++++
 9 files changed, 198 insertions(+), 3 deletions(-)
 create mode 100644 paddle/operators/scatter_op.cc
 create mode 100644 paddle/operators/scatter_op.cu
 create mode 100644 paddle/operators/scatter_op.h
 create mode 100644 python/paddle/v2/framework/tests/test_scatter_op.py

diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index f466dbc79a..f0fd12f1b5 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -47,6 +47,7 @@ cc_test(gather_test SRCS gather_test.cc DEPS tensor)
 op_library(gather_op SRCS gather_op.cc gather_op.cu)
 
 cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
+op_library(scatter_op SRCS scatter_op.cc scatter_op.cu)
 
 cc_library(net_op SRCS net_op.cc DEPS op_registry)
 cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
diff --git a/paddle/operators/scatter_op.cc b/paddle/operators/scatter_op.cc
new file mode 100644
index 0000000000..cf01ef6279
--- /dev/null
+++ b/paddle/operators/scatter_op.cc
@@ -0,0 +1,76 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/scatter_op.h"
+#include "paddle/framework/ddim.h"
+
+namespace paddle {
+namespace operators {
+
+class ScatterOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    framework::DDim output_dims(ctx.Input<Tensor>("Ref")->dims());
+    ctx.Output<Tensor>("Out")->Resize(output_dims);
+  }
+};
+
+class ScatterGradOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    auto Updates_grad = ctx.Output<Tensor>(framework::GradVarName("Updates"));
+    auto Updates = ctx.Input<Tensor>("Updates");
+    auto Ref_grad = ctx.Output<Tensor>(framework::GradVarName("Ref"));
+    auto Ref = ctx.Input<Tensor>("Ref");
+
+    Ref_grad->Resize(Ref->dims());
+    Updates_grad->Resize(Updates->dims());
+  }
+};
+
+class ScatterOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  ScatterOpMaker(framework::OpProto *proto,
+                 framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("Ref", "The source input of scatter op");
+    AddInput("Index",
+             "The index input of scatter op where Ref will be updated");
+    AddInput("Updates", "The updated value of updates op");
+    AddOutput("Out", "The output of add op");
+    AddComment(R"DOC(
+Scatter Operator by selecting from the first axis, 
+
+Out = Ref
+Out[Index] = Ref[Index] + Updates
+)DOC");
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP(scatter, ops::ScatterOp, ops::ScatterOpMaker, scatter_grad,
+            ops::ScatterGradOp);
+REGISTER_OP_CPU_KERNEL(scatter,
+                       ops::ScatterOpKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(
+    scatter_grad,
+    ops::ScatterGradientOpKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/scatter_op.cu b/paddle/operators/scatter_op.cu
new file mode 100644
index 0000000000..e6a6fa57d9
--- /dev/null
+++ b/paddle/operators/scatter_op.cu
@@ -0,0 +1,20 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#define EIGEN_USE_GPU
+#include "paddle/operators/scatter_op.h"
+
+namespace ops = paddle::operators;
+REGISTER_OP_GPU_KERNEL(scatter,
+                       ops::ScatterOpKernel<paddle::platform::GPUPlace, float>);
diff --git a/paddle/operators/scatter_op.h b/paddle/operators/scatter_op.h
new file mode 100644
index 0000000000..c2db3ae37c
--- /dev/null
+++ b/paddle/operators/scatter_op.h
@@ -0,0 +1,60 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include "gather.h"
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+#include "scatter.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+
+template <typename Place, typename T>
+class ScatterOpKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext &ctx) const override {
+    auto *Ref = ctx.Input<Tensor>("Ref");
+    auto *Index = ctx.Input<Tensor>("Index");
+    auto *Updates = ctx.Input<Tensor>("Updates");
+    auto *Out = ctx.Output<Tensor>("Out");
+
+    // In place output: Out = Ref, Out[Index] += Updates
+    Out->ShareDataWith<T>(*Ref);
+    // Apply ScatterUpdate: Out[index] += Updates[:]
+    ScatterUpdate<T>(ctx.GetPlace(), Updates, Index, Out);
+  }
+};
+
+template <typename Place, typename T>
+class ScatterGradientOpKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext &ctx) const override {
+    auto *dRef = ctx.Output<Tensor>(framework::GradVarName("Ref"));
+    auto *dUpdates = ctx.Output<Tensor>(framework::GradVarName("Updates"));
+    auto *Index = ctx.Input<Tensor>("Index");
+    auto *dO = ctx.Input<Tensor>(framework::GradVarName("Out"));
+
+    // In place gradient: dRef = dO
+    dRef->ShareDataWith<T>(*dO);
+    dUpdates->mutable_data<T>(ctx.GetPlace());
+    // Gradient by Gather: dUpdates += dO[Index]
+    Gather<T>(ctx.GetPlace(), dO, Index, dUpdates);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt
index abb9c248ee..37e186a408 100644
--- a/paddle/pybind/CMakeLists.txt
+++ b/paddle/pybind/CMakeLists.txt
@@ -4,6 +4,7 @@ cc_library(paddle_pybind SHARED
     DEPS pybind python backward
     sgd_op
     gather_op
+    scatter_op
     add_op
     mul_op
     rowwise_add_op
diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc
index 8fa8be2cef..3bc150ccb7 100644
--- a/paddle/pybind/pybind.cc
+++ b/paddle/pybind/pybind.cc
@@ -47,6 +47,7 @@ USE_OP(scale);
 USE_OP_ITSELF(identity);
 USE_OP(minus);
 USE_CPU_ONLY_OP(gather);
+USE_CPU_ONLY_OP(scatter);
 
 namespace paddle {
 namespace framework {
diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt
index fb4686889a..661ebd8964 100644
--- a/python/paddle/v2/framework/tests/CMakeLists.txt
+++ b/python/paddle/v2/framework/tests/CMakeLists.txt
@@ -14,6 +14,7 @@ py_test(test_sigmoid_op SRCS test_sigmoid_op.py)
 py_test(test_softmax_op SRCS test_softmax_op.py)
 py_test(test_cross_entropy_op SRCS test_cross_entropy_op.py)
 py_test(test_gather_op SRCS test_gather_op.py)
+py_test(test_scatter_op SRCS test_scatter_op.py)
 py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py)
 
 py_test(gradient_checker SRCS gradient_checker.py)
diff --git a/python/paddle/v2/framework/tests/test_gather_op.py b/python/paddle/v2/framework/tests/test_gather_op.py
index e868983042..e3de3fd0a1 100644
--- a/python/paddle/v2/framework/tests/test_gather_op.py
+++ b/python/paddle/v2/framework/tests/test_gather_op.py
@@ -21,12 +21,9 @@ class TestGatherOp(unittest.TestCase):
 
 class TestGatherGradOp(GradientChecker):
     def test_gather_grad(self):
-        print 'creating op'
         op = create_op("gather")
-        print 'creating op done'
         xnp = numpy.random.random((10, 20)).astype("float32")
         inputs = {'X': xnp, 'Index': numpy.array([1, 3, 5]).astype("int32")}
-        print 'correct before check gradient'
         self.check_grad(op, inputs, set("X"), "Out")
 
 
diff --git a/python/paddle/v2/framework/tests/test_scatter_op.py b/python/paddle/v2/framework/tests/test_scatter_op.py
new file mode 100644
index 0000000000..e7696844d5
--- /dev/null
+++ b/python/paddle/v2/framework/tests/test_scatter_op.py
@@ -0,0 +1,38 @@
+import unittest
+from op_test_util import OpTestMeta
+from gradient_checker import GradientChecker, create_op
+import numpy
+import paddle.v2.framework.core as core
+from paddle.v2.framework.op import Operator
+
+
+class TestScatterOp(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+
+    def setUp(self):
+        self.type = "scatter"
+        ref_np = numpy.ones((3, 3)).astype("float32")
+        index_np = numpy.array([1, 2]).astype("int32")
+        updates_np = numpy.random.random((2, 3)).astype("float32")
+        output_np = numpy.copy(ref_np)
+        output_np[index_np] += updates_np
+        self.inputs = {'Ref': ref_np, 'Index': index_np, 'Updates': updates_np}
+        self.outputs = {'Out': output_np}
+
+
+class TestScatterGradOp(GradientChecker):
+    def test_scatter_grad(self):
+        op = create_op("scatter")
+        # test data setup
+        ref_np = numpy.ones((3, 10)).astype("float32")
+        index_np = numpy.array([1, 2]).astype("int32")
+        updates_np = numpy.random.random((2, 10)).astype("float32")
+        output_np = numpy.copy(ref_np)
+        output_np[index_np] += updates_np
+        inputs = {'Ref': ref_np, 'Index': index_np, 'Updates': updates_np}
+        # check gradient
+        self.check_grad(op, inputs, set(["Updates", "Ref"]), "Out")
+
+
+if __name__ == "__main__":
+    unittest.main()

From 1dc62cd21ca8699c6740d71cf984a7f5d589b77a Mon Sep 17 00:00:00 2001
From: Xi Chen <putcncx@gmail.com>
Date: Thu, 24 Aug 2017 11:28:48 -0700
Subject: [PATCH 099/170] updated doc with implementation change of trainer

---
 doc/design/cluster_train/README.md            |  25 +++++++++---------
 .../cluster_train/src/paddle-etcd.graffle     | Bin 5765 -> 5557 bytes
 doc/design/cluster_train/src/paddle-etcd.png  | Bin 57495 -> 50387 bytes
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/doc/design/cluster_train/README.md b/doc/design/cluster_train/README.md
index 74961f8005..177a5f5d54 100644
--- a/doc/design/cluster_train/README.md
+++ b/doc/design/cluster_train/README.md
@@ -54,17 +54,18 @@ The life cycle of a single task is illustrated below:
 <img src="src/paddle-task-states.png"/>
 
 1. When a new pass of training starts, all tasks will be placed in the todo queue.
-1. The master server will dispatch few tasks to each trainer at a time, puts them in the pending queue and waits for completion.
-1. The trainer will work on its tasks and tell the master server once a task is completed. The master server will dispatch a new task to that trainer.
-1. If a task timeout. the master server will move it back to the todo queue. The timeout count will increase by one. If the timeout count is above a threshold, the task is likely to cause a trainer to crash, so it will be discarded.
+1. Upon trainer requests for new task, the master server will dispatch a task from todo queue to it, put the task in the pending queue and wait for completion.
+1. The trainer will work on its task and tell the master server once the task is completed and ask for new task. The master server will dispatch a new task to that trainer.
+1. If a task fails for any reason in trainer, or takes longer than a specific period of time,  the master server will move the task back to the todo queue. The timeout count for that task will increase by one. If the timeout count is above a threshold, the task is likely to cause a trainer to crash, then it will be discarded.
 1. The master server will move completed task to the done queue. When the todo queue is empty, the master server will start a new pass by moving all tasks in the done queue to todo queue and reset the timeout counter of all tasks to zero.
 
 ### Trainer Process
 
 The trainer process will:
 
-- Receive tasks from the master.
-- Work on the tasks: calculate and upload gradient to parameter servers, and update local model by downloading new parameters from parameter servers.
+- Request tasks from the master.
+- Work on the tasks
+- Upload gradient to parameter servers, and update local model by downloading new parameters from parameter servers.
 
 ### Parameter Server Process
 
@@ -119,8 +120,8 @@ When the master is started by the Kubernetes, it executes the following steps at
 
 1. Grabs a unique *master* lock in etcd, which prevents concurrent master instantiations.
 1. Recovers the task queues from etcd if they already exist, otherwise, the master will create them.
-1. Watches the trainer prefix keys `/trainer/` on etcd to find the live trainers.
-1. Starts dispatching the tasks to the trainers, and updates task queue using an etcd transaction to ensure lock is held during the update.
+1. Write its ip address to */master/addr* so that trainers can discover it.
+1. Listens to trainers' request of task, dispatch one upon request, and updates task queue using an etcd transaction to ensure lock is held during the update.
 
 When the master server process is dead for any reason, Kubernetes will restart it. It will be online again with all states recovered from etcd in few minutes.
 
@@ -128,13 +129,11 @@ When the master server process is dead for any reason, Kubernetes will restart i
 
 When the trainer is started by the Kubernetes, it executes the following steps at startup:
 
-1. Watches the available parameter server prefix keys `/ps/` on etcd and waits until the count of parameter servers reaches the desired count.
-1. Generates a unique ID, and sets key `/trainer/<unique ID>` with its contact address as value. The key will be deleted when the lease expires, so the master will be aware of the trainer being online and offline.
-1. Waits for tasks from the master to start training.
+1. Watches the available parameter server prefix keys `/ps/` on etcd and waits until the count of parameter servers reaches the desired count */ps_desired*.
+1. Finds and watches */master/addr* to get master's address.
+1. Requests for tasks from the master to start training.
 
-If trainer's etcd lease expires, it will try set key `/trainer/<unique ID>` again so that the master server can discover the trainer again.
-
-When a trainer fails, Kuberentes would try to restart it. The recovered trainer would fetch tasks from the TODO queue and go on training.
+When a trainer fails, Kuberentes would try to restart it. The recovered trainer would fetch tasks from master and go on training.
 
 ### Parameter Server Process
 
diff --git a/doc/design/cluster_train/src/paddle-etcd.graffle b/doc/design/cluster_train/src/paddle-etcd.graffle
index 1b6611bccfb0034a10044f2f175b56c46a98f1ec..b4be06a0b1c6ba4a84475d2e5d6217b6c259bdc5 100644
GIT binary patch
literal 5557
zcmV;m6-w$KiwFP!000030PS5_bK5w!e(wAVO&;dqPAmZg*R+!<%C=-X-eh@+U9Os<
zC|F`lk!q6ilCJ#sdq6D&MUa*k#Y@vwi6rn~$NBI9IK)}~@9n^?T+<+QeDBLgTty!%
z)U*Ar<MqCLJU(iy=pX<4^~37F*7w(r&JH#zL)QtT%E57cXKStUaivx>heMavYU@Yq
zm4ltF_E81WsMR+1K2|>VqiFcKR=c^msai~`YWo8w7}gE~e@KJqZU+)ufe6)Z)cpwA
zj8jh)2SU4!9ew@q$LbZm`^vI&Y%7B2Cu6bI8ViTVRuEY1%O9&@6hQgELUvVu;5og(
z9}TPf@U0nGmzOTpR%;UxP8w@Q6>Cry$sn4FIg!=cH0ihqYXOb(U$-JU%X}g%A*`(+
zeMKS_iG0TRvw|!CLGZ^XH%r71=HuIa>Q!!?dFh7Z(xba!COc~s`SY?+$6<fnw?_l&
zMe}XscoFT<;479_Yt!%3cq6Av!yVtgqTTs?_VzQm#L_2D=ycpnvCcNy^YU8*t4G)T
z5#$+UOOq+gj^kbBrJgO!uGRBs^bq+FiqY;<h?E{awV4R0z~yA16?U{-!TM(`eI}B)
zqsIx+MSFVJzox9?HZ8B~(uHfZ=SOt$`tJu$&+!%(KY*^Q!+5<47m$S?(Uy1V&qOEM
zJ1J+o?njY7*tG&E!BmFR-C5F?u5U%Wp3a`gD5bu&MDNErZ2E!o)AuYF%I$b@4;Ezv
zV8;f$Afvh&{p@aTyJx2u(eHKld~4G;<yO75)!yiv{rzL4;Lho`YxmaRcX$67pZ7bP
zC$0(4*EW=+BWe4u^V2<KZk=wJ?Y}^#aqHf!HH(YCLocJJG!NE`(?75PdfU!VT9kYk
zKNqC_w~D1t70r---IEI&2ZxkFIfdZ{Bl^vC#G%vcFSRF*{NXZ^U#;;I<{`DZ`<{Du
zzv_zcFiv@@!IK%-j+|?{W8KkU%j?qHS>Y2rM?9WSqrp^Vc7Ke*NDu)lIWb@yLJ8+u
zsW_v$WnZxw-|NORp<{FLW;L0JGN~|mo{aLpS9Ak?s;polqhFsYe@iOXK2_ic{*@Z|
zlZ4h>D-@+O;T!Ef=v-J`v%G676qGNM-qt#w*tCjLy0(*Ei|!H(PuFoQ5caLEf5S<n
zM8-1ffpx=|{}1sni67K58AO49MVFER_nrSb?&gr5`~G0TQ88}&bqD5FMu@`k6v#!M
z_B>EP1v*}@D(Q~qQpt@ABL?NhN<nTCM@I}#iCz?ssQ`eZpsJFns=hlKc&6+0L@I-G
zW3-2s4LNafV9s~H5_Po}f0>D|JK@l^?%KBH3TgzTy)uj{Un=pJ+ibNa!#-r>pNpPA
zUv>=7>ldeBK<(!T5T8jZ5|_o?AkD#p6eP$<sVW#5s-`PQk>Lv<0wGnR=tP$d9cD(P
zVTKlp3{TKJQsh=9$s`5}F)mDFH=O8*-bUj1Y2wYta?=oyL^-6Hxn2xIsX=5Ib67Nu
z0>qupMM|}^IAK0zJ)81`TEc9Q;t-Q(@!&*NMZ$_ou&jYbNruX1$Jww=5;^q8M1)^W
z&6fcQ>Q>8+mU-mOscRO0vx0dbS%j0>Qsu$%vhou(iePLC%9xc(^z0&tF7bs0FdhGA
z4|`Zrqzl`(eTzL(kUahojymvLC+Nf97ni;lMIHCz5?x$+4t&~u3z%nwDx1{3rV*^<
z_UMTI`3q!i-}M6!{_`TBT}e?cdI6;#`_geo6h33A|GS{KLs*2-?o1xSFNRjoy@+lV
z6~X@)G@gB^I{T0$IsVWQ`yeD1Ax11B%drTQ9gSnN4;9BEss<t~T_=Fm$|$hhi@$6Z
z5HSKuy-UlE`~ZFU5Z5k*h318Qi7Gc%WcLLSBj}V<02HzIXL~svUjUM(3s|yab*Q@@
z`18qLT5d>d3*#L_i(<6%)2qq+A;lzfhSY2b2u5_Q$f=o`0~IXIVTh=4H-TV34e`ig
zcvFDJdJY;N_z*nb_NiHrQ#^&RX$zxYf@Ye^$V6gOn^9E&WMx&+R8=*QQXGXq6dmSH
zS(Rm^>avP-MJtLzWSNmLq8CbGgOyiPs)|Sp(zSdDU|%|}`y1i<)lLs=V4FdZDcmg2
z1~e3%ip*y|p#!vM*>{-)=3rtvD_D+Om?3)Llyona6%#(9s%#iah*S~MbW9`#sj|u#
z9+t_wr+C`ZD4zOAiYMzDF;wUXRnsI)bg86xCB=L16t7YnhPBGd$}pgp&TUEfO2Q{b
z(8<Y8)c1Sr@S|=8Yy))wP-xF369uFBE=$FWS!2$mnBC&aOGAX@<5ph5^R4_it}1C-
zG4a%yjC!vOR-_^sNmgLyk%4J4QQ!jDPzV54$xZ)3+>`;AfKcRb&c9w-D`QgCiGpQK
zL0C6*7^#YktCEIw_*02SU`AskS4Hog<H~4R99LEz%W*YBmJlqH6$NXWhLn=ymK^sT
zbKD+{YEZ)$;I;RoB$72m2r@7%{@H|%5vfW9>x4*#ro%sj!KAFedzi#agGuSJFlk7#
z0bN57CYnlQQo>{jlkXTNU2282T>QWI;=ceSrRO<>L2S<%W4&r963}r4Nt&+d#j9AP
zRt^47j#DT;VnJ0+PGV@<VwskU*EhI$J+PvP243#k6~CgjYY8~dyxXW5Pu*=4tz{pB
zY{sXGqNoo=C5Td0R24ApzAZOWWjh)mXGjXn(mIQa%N@A#ba+YIXnGaa9WY@N6&4_v
z?Ig<*u1bi=l8!Y^(iAl|2bACaKANE{?W|aS<N{IEm~D!tVZakbmyL3LSgsG>^ZL-Y
zugc|Nc{VIY%tx>_RR#Kl3lBzP8JGc!869RaS`|we8AK~jP0NMn>s)x=_wt8PRb<_e
zi2;PqP$l;AhYoDdkPS`NfM6)Hr2a#W1N5aG2N;iJdqA~SgcVIDGEhDPm6m}e-7Aj+
zO1}4k#{tH3&@dInG>jCnCKWN(WGD~aSR;m3(yWqZy(*feY9v1XGIU0z^mo5;rsNr*
zmHgr#$S=C|GO{|r2qsj6{WlF-h!((z@3OmA!dGWbr6~GLt&Mm8?v#Q{wJz2A+0^>L
z3akNzDgO28wZvWsR)9$|dvPF@lC&JXF3>gUX}Y#3Yh7bqni=_Xl4H@%yjXHcg^~Wt
zTv9Pfq4(qzf+wdC4lSEzQ<>fn7W6tI@;VPMFH7CaU5c07r8u&}s|vmkvz`TkWTUFQ
zAO?{VgGvl4G3d2okhEkB0uu7_7$lb%RANwxL9Z2q$Zz362)`f#DJ24x2=wd-ba>Rr
zSPXQn$Vz_l_e0MD`HP}wf$@#e_3iy*)JP;D>hR1vLFcE3N2$Qhz&86^t*u$$k-K+r
zHW<2Rq-D+m`{&;_m=rwBM6bJBT<+g)&X3Q&?*&`y?Pe0)+#a@D8_g@fy}<?ELKzQ-
zXXJ1=3+zZr-}bJ|RGL<6zun)qPU}cXa&JjS*dgEM)9gZ;d)J+&aW@Mzx7!=$>Fzp|
zVlWFl@0s=f@Y{wvJZtfFZ=HSL?w^zG{w&aL_K&)oZg_rXwUdmXksDB)qwgn4BcGja
zhvz5tzCCDoN%TSE#%k;#dV1?7MS|!X8_LmfvvJ2o@7|q#ulvX3P)Qo3dCVGPqlNa8
z?l`{LGW)GHR`=6cpxZl#x}RTnJg((g>-5d>gCtF_McwVItG;>CnFa3tJpb{%b!Nh!
zos<*)YBgocgi<8Y&AP+_x@JA;IJ3EHp0(=xDamfD?wZh(=l!(~*BxPfaI@Aiw=8ai
z^uB!I)hadi-@+u#9-whBiUU#ZynSJZy}-J=u+eLp!lm&PE-!fsSBVneduSL{)lgK~
zkQ4)}czmfaOt0_$*hF2HVVQ~_`<}V1=mwIIuBf_hXr#PVxLm5d=cS4XFn8$(l_;<r
z4_dCmW}6o{bt-@}(Vg!Fx4sLkt3<<Agv_Y}Z1xrxGl*y2AlR#nnl6Ci{L(K9$9Y)B
z!=#>T&m%yOyrq#4K?K4DT3xKyStJnz<mhcqnMIPNjz3h#$L_CwJ!Z`)>b@eI&|H(X
zLJcXh3@x+`%^NoY`_P3fMDuM#bFD=426>@E8@UQCvKGxE8m9yrCZ8&jtXGK!#6|qh
zbV{7v6`^DCXqeCDp%^3B(OELWyd1U_@TA<Fb1g*|wG}P278NPqeRiLPmY)kCdWjgn
z1u<0ge2AeUBNqaADgAz2DG{SYj1n<kCt~1d<`{(N_fZSNZp4rlARvm+>qrSFC7dh=
zP70AhyuVPm!ywcaaf^%h8PYoqD!XbF-M7<FRF}nf8*-r|1s+5M9!2UXs(+zs2qR3I
zjC$0H`hqBhThuvPA`HsiT*$44Si0Pdc<5%tFr>k?csD|baoLbZ>lyPQplh#5&E!(e
zN;O-K2@>1Se}bCjP_aVBDLuzFb;;mfVq^ZDuqa7=RV+#^;k|_SrNDdiM0kH9i<0D5
zMWeJ*&q_UeH#ABqY1BKRQNqi03e4fO-(yez-tXgiUW>H1XS#%mT1C2!na4L78M?}@
z;*iPsWi_npRaI3K1D-PvaH^5>>d0>~ZBmWj4R*qpN%AMsj%XHL{B@4pmr5R!O7JYH
zh+i(b_w|wmY2{nBG&5bN=VgmrV9Z<Es4Ysg?qylb<1zmHZ#VkW8Gar@@&4V5{(Lrj
zwMW4v#;*@7HB^FS36>>TmSBlq3@l}>iugZ;`4Jnxge)<KJlvh5`imOPYxoU=rLM>Z
z!iH+dM1oPS<41wz;;97Bl1d4dC0M>fuzUnIK5*kk)ys{Wa^vRyjhk0}fU2ogU6+xj
zOIVT&6#IiQCdWK->;_vV3apT@Zm7Adpx=1vqdlYXjBuJSIP=Mw>cuhgr~3y^vISHA
z<5CKqEv3Ave}Tzgc;qDxd)xtC4h^`E2rTh*PD}~zB?aZ#-t!TIuJ6$oI6W7EtZcku
zkX1^MEkU*f*%D+Q1F}Qvb)mIh3C<!sJ^_8DNUN4eTO#d05NQS1{f#qwqgVbNqK<TU
z-S=B-$(?=2SM~aKoWw4l(S~)phpba$lw5oFeOnn{(Ra1Z-&=R;_Qsd_ad+VvckIqS
zNOO}<bG;+oUfaF3W9}Zn#t&$rwe!JQQX})simwr57w;uk=`|a7(~I|#Kys*bnx}Jj
z3)We2Fzz-R4p*d;f*S){Ykc(j{xM$YazJQ|`o`XRtFg-!e(Ttr+*jD->)z~03Stkq
zs|j1p>4RC|=CFOfz13=*{@j(4mdB{cZur}Te@R9Mwg0rux@$IXxQqOH5U|-YtycX9
zcje!Pc@#h3uKZ(lHc!u5?1sNY@;o#lpxO7&zRc@qYRJDOF7nE6{JZzMzsT?UzthN5
zBc+k&yM9;Z-s{Ems-}FZ$htZqr5cJI;mI#|V5+^<TgDzUG;&-h-sePd*(Si6RGuW2
zCrM8?0iH9{z#0auk@X4I7^JEo1WRK@R}djGJd}|7V!&hkQ?EBHO#@!iG++>4iwON}
zMRXAxfu!XRT+5~dn!W$0u<M@7{z{S!P1j^B8>&iFb~BHbtJVYo&3Gxr)={b0rDDIB
zVq4vA@cZl{)vB&37^#vWDOf|A3==L=s)=gyrjhDPnvv_cRLfE=U&0jce_a|nP=n9X
z@3dEzl!OQqLuYr?X|GZ*rBW{+s+YpUgExD0aC6-`ZT##uPZB>kJJRj-nKWuLuR*iG
zc57pI?9AqNKiQ|BxDR4pgOWY<iTfbtH7F_4#C;I+8kB6_PuvGFuR&azWA{O97e3j*
zp12QUUW1Y}C+>ro*C75hBR&jZX8>H?$L@oe*PvP8)O`^18kB6GPuvHwgMs8MWpYAb
z^3kX6gV^B(m;2a#P>1)zGj$(ye8X+?Pu&MGuR&bh$L@pXd@4=-3bkmG=EQvv^BOb@
zoVpJ(`SXOS`yl2uh&wtMyANVsgOb8e+y^nQLEO2=*nJT58Z-->x(`Zu4HBIuyp4(M
zt)C=J*{AE<SIf6N*GkLtx42(e>=sJanCbPHpTIIgCFFN|b~K*o-+J+m&bd9O@aCM-
z)1b)g!0k5ou1d3mXE!@od&g!6rE|d-aV|(ByStc=t%BX=g5^;vr8J`u06(_^&9DhE
z=%=t$%p(Moo?wn$*kM@Cq_T`$vYg>rbK!razVk9-j=)`6p?|8Z=!_xg2!E<*M9VM)
zF+W&@9TW`_38BXhk!5#Z`iBU}vSrFZUAd+Azzg!W{Z7Tqj6Gpj2yrYI#ke$h1e+Y3
zJKeOS4z6ow678X7Lz%eTChj$O!{1nX*ZScHEXuqgbVeyHkusqw7O7B2C@YfeEi4%<
zESWK3&bM4n!BDC_1Wh>Yq`9{*`R<AxO&nM~$|-V!Aan7Uz&A>VM}FJ3cnslPNsN6?
zSa&qwljP>LOh=iZE1;=!0GjenF-{bg>Gz22Sp#81#^tF9yZ!(I(!)FoM(I>f9D0){
zQ8e};SAP^0kABm023EuuIuAo*aoY)Of7kKWoiNI3i=I)JSiVgG8>~4`x>H+wl!@cU
z#oq9GY0tjXa&`SkP%ySM^qJ$H9qRQ`i^OS0(qvy>GIS&nz`jhzaT*USY;@xforUAf
z5B>eX0g#)ESvdaV;j-^hrZWW!aOmI6?Ud)C=TtnSzvJ=~l`nogfA=Ykj+`i^fRp}M
zv%G67oEzwItYgplYeadfkY_zN9`X;tgcB!pIxao&o$fb!H(vt|!)MK(sLHrX1DLzw
z_F{8RbL4bsXxfo;J*%R4lOc)B^uh9M`oOryA>HsICrEW?(wSr5xsC^9h}Yto4v&f3
zC<vg`aktFsp3~c7Z&U$~QVCO(bRB)FND7IsGn1K9TgF||ba65S(gorWY4-eZ<kBEl
z09vVI-S_y^>2jK!ODy>)|2wie3pK)I%pB4luX{;jtPn<<<EAhWySRuDENF>y-yfV<
zz`~NT%FQ*YX{Sg<gmfbl!~r}vfM;Vm##?_d3}_hcGlah4v|(;g+5Yej(DI6Qw+2kp
z3M3)>h_<1mE<IRp<O>4(1lDPSI4>jY0#3d)vRo&+<D-o0jS}4C#F;C!RAG}A$te4z
zeR-Wrt39klX~Pd-{#&fX{b=CE&Bc7VFBZk|Y<JY9b<4i$1#FczSx51)r(>Uwu;+U*
z(nGn&TqeKWVciyO#Er9fu>i7Qc8&Eb)AIa`6$`_Fx&{Wac<K<jyYIPo`DKP7>boO0
z?Lqt3cMr1NpDsd5zPZ(Sko@M+?#_cEY#%fqB!95?F!`gc#sg{qGiMYo*6^WqP0g_3
zT2TXr_WqDX7sVu*n4^%+XmEbFfz)n1atcNE{74KaQ>a?44PBTozkc|C5fQTYW9R??
DBK_h!

literal 5765
zcmV;07JBI)iwFP!000030PS5_bK1(be%|>NoIIR|dt&H8Ln}MUDX<N;6AyTdU9LJM
zL>sg%B(@M9vMc}nc8ftR)ROT)z)rg=0a|PIyuSW=P_Iw_z3X|U8yXJW!2kRnmcjcH
z^_`&Y`kl}3Pmb$r<o$oYy!-U8js5lG^MlP&-*X31>ENXHZF{}+eyvie_WK^KR5p$`
zN(bMzkB&<$j!I>7?|tcgH;VcnE0x>Z+p^82%1+SZf`iII81!iv-G5_|tucjiJ8HjY
z$&6x8Wru~fT_^hT?vGE`^!^J^&UNgFJwF|ZeX8(q_Sg<Xn}7M^(;y02{=cx~%0bU}
zJ7F;Fm-pGXMrdDMdDQ$=8H)(g(6q|XWO+drHVr6<e5y>Mj<c{H(m4GMJEGIX$HE9e
za}AI+gi8p2gz%#dOaB4vPmFFFNf6HJJ6-CR?%Y}Fy6e&7`+g=ldl&_?vaF2#?ndAY
zd(@9+>&W#Z+M(eWsC}wTzEAXqZkrCi1<o~X&!)4tpGhT_K6M9f%gbczeDi2le!FLP
z=z1_@X@=R{WHR&3^{?|{&t_)V?)Wr%jQoIQ<ETrSQo8$8rXtJ)uErg$u%WdQG(SS}
zQ8(ocJ&K6V+tRzi4do5DW&3T9&RwFtAfoe^e?N3Pu0OZ<0c*M%>#x^?Ib_+7XxqOE
zrlMo(9q03?7DQ3d+qFZMgNck_yVIzzyugk`JDon!K+1e8iQSJ<*a|}TN8sBY%eU*t
zEm)Kg13TW~B?;B4(U11lj(2_r0sUU{F1ELVs@AMEw~scv)$aZY&|&Lr$8$RC?00+r
z1YUGoTc=)?Jzw9{kB`yL-xp_lpt^mwSv~rj$<*(>+x13q_P^1q=sDE`NpbuKHiO<H
z_XjPCeh@#G#Qu)~(T9?0p`Q<=!u!D?<xoypcVj*J)iuPS+vzT}C60ssBBK9P5eLje
zYPa`&@BU%amEmC=^TdM3Bd`;>H}so*Ps44$P4A|ePw*Uwcs@~MLshl?*KiO?6ri4?
z!TKS~;Y=+RCsebYYd+%p?RX?~9YJqelCdb48YIt?UjEM-vA~DY8pIm-`Jwa=GNAdP
z#D2h^sg6I6Xx+01vUtXP<EYDQZctvg{Tq8A$zLYE?F})qNfA-HwBuHb?h_18mvJO8
z=-TbzR**=EjAS-K`&P{VAM$PzKd599h{E8SE+hfryZCj~%maE61id+X#i;H#TsF3H
zLNpi+fr9d+<rxLkS;LdEo^EI%mRw!dV?eG}3UU)20yv&xQl!UCfPtf=s7O}Sz#I1b
zs^@lOCIfRdj{3I4QWA97nD2eT#-~dBWh%bm4*H&bf8^Mnq(qFgm-<oZb1D9Emo3(~
z+YdPT7t|Bz%l6?}`w|R{QTy2rBqowl5|Wtdq&awyf&>ldWgP;`Fo_OyjeTK=fN>e?
z1QX37Y-9u`<Y=L5><O4firm5^iNru5$AyV@-HndvT_o2}6K_$=OG7}Sa!4~{y&Q&8
zoyf9guxO&fh&!E8O4YMGVliftjd@Hh*=UgB5aVZY=LBUPLEXSmGnqvp%iyEqwA&_;
z90nsIBF?5}^8h4et7Lo2Eb``*HH*Jl!90*G!pUr|^5A$;>B%ZZ(l;e}%t|G8b{R%j
z@X}^59slR_I}qvU(g~cv=8trsjeZP<E%uvW@a~_>tH6(<mUnptF0Xu-eL7v6G0zB;
zwy1YQBQ}%UqeJ@VPbTXGUJx?jKQBYtM!J642`TmYmzFoA>@$}7ze{@8XOl46p31ZE
z%f219FQYr%0PKIrES`TDgnuBU#UBLl4~%0GY{ep)7K^aFgON7>FkmcVm`sGn>xOK$
zG7N3+@^6O+M4W(9|H^ivAO!E;#ibiyRJwGoK<U<woUR07B#m;$07Y*7*;Y>LOF**f
zF)aCJx2U%f2D8y#+1`Lw=EggY7R6{6q-T@)U5ZQQbg5|<ko4%-kW(YG04fm8V2G@8
zw-~{G9^&Eo@TLHbNe&tx1T1*A?o*?nV0b#_!xjWTCDk;M(J<men^o2s$ZCde8irv3
zy;ub^6~e|&&CoPp5X}HYH;YuTrg0L6Nud<9cz#X2tjnw*UCPG*_Lb{-zYwlhc6i_&
zyBda>%uUm*vx=e<k=ew@bbxkj=ROm`3`|T$1>5xs69kVOk{-meV#0?FgLgv&a2WuT
zK#X)?Xa;9^P{VJY;+YGhc*YYco<>Y;8LS};(?k#xq$r-EcyFEJl`8!~rL?xz59yVA
zrwE@Sd~yUG@9ad~pu;ymYIevMP<sptotbE&a5&p!DZQ9A<_wD2C9b?UWJo@$<uy24
z%TME~$TW+IC*d;2gECM@MKVa!*~p_YrpZMa7l4+I8DJ@H`Wtam4qOsKk-s><y0lh<
zxJ<APHB$$WScLUdU4vz0Lc;zGY+^Q|agu9*x6W}juqckJ=}+ajrlldkrpdYvP16Lr
z;<$?AzG05rp;3jEa0$HjVUz@#2`~m0WRrhBphJMm2t$G~vP{DMSsW%c^5$U@E(|8o
zQ(@9Vn#Gz11BgunYgoagg2^`wlODAPR89VuKKU;IN%S(CFo3T)LrBV&ju;)+0Wyh6
zif6IFC|ly67ROM$#RAHvmS~vfe2J>b>nohR9@tStLqB)!3SZIORVx#uArqSjB4{GM
z!jsvJ{5?n+UKlCUo=3_E%Nx>&VM0a96e&}rOp&rx-;p84b9ZE9nEBLg4NIHja162@
z*nq%gA`(2vREVVrog#F<n9wP(dSS3Kt!IOk{xV=?eJJU=@z`l5Qz`3)!6vQfK?vhp
zR7}o7Ive2$*M(|JRUP+R(A<)gv4(XFuwh^g@}mJ7f@K7-h6prGWa>uzeuwtvH%2Xe
zVTb>oI=M7VKD{?h$nZobnx*EJYHs<K=azwUt)`ahu%8?;pTO1(gV85gc+xGFVIyEM
zqk~+=EJFl=g-vw~PE9;t<xT>BiAf`thAl?;ECcbkc?n~CmS&ln$q0t7A>%jMN+1i{
zO0b^D_88SR0MtzlYmD+)K)vdr=$_h2P<(HR=R2&IpkW4xX&5eIO$Oks$<iOYu!b#D
z(JV!?R*GgBCXUZDS%gz5^5z%L^t?9;6u<Zl`9+&vMRtoZf-%+L|4oACq6MtSxB2CH
zgOxiUuj^!L*4CSU8D59Vtd&{6m{}j#q1~fwh+jRsM*M~soiR!N0w5!m2rb603v3NN
z&(`K;t?Rr=Gd+JsawOWC6-#a%v(kT=TgS#Rbe<hU`0N-4ecPegSSDA#hMks-yw1YQ
za;dvorC8=F#j!oOF2RQ|>qQVqv&#AsF$gORQW&H#XjL%?Ef|9s30Xb{X$pfB1}O|$
zRSd$vga-k*L<G_m0x1M~aRfR%u4hgRwC%`Feu@r1rh%g8k7;1+0;IOHe*)@>Bvb95
z`={XI?C>}h*y=ge?sju~8hGsO9h~?2-Z^eor-9vzubW(oJ<O=rylo-%ueTQ`=im3j
z?Tw>GqF&wUA2l}{*TK=I5O~M(c-TM3hy7__3+Y|Qzpkd@w43`!-5vX^2J|HLCb9-C
z{B<_YHj8ubrq!_Sr-9X-qs{8s?gq<6ZyI>fsn)vvubW=~yeXEwdH#K;dx3Yl(?F-u
zJ#KG#gNt+fC`pJ_a-C)8`1@&6$>(P~gNxHz*Xh;$M7>wPwd;F;p51v#mYDkHrha_V
zsNW0f?fdiZwcrFF>Pdw(PIzT(Ho;!f94EKi)oyd0m;G!S=yfhw*)MKdzEJbLcDmKk
zgCtJBNxhxx>u&Y5H4WVTaq-vp=6RL<IY~a*U!|gNS6MC+^=b|AfOfT(G+ec@TRm^q
z_EVDWX3eXzmb~b$w}j>x)Oxq;t?ITd^pMWy&!SnS`u=Mer1=9d3I=f?$Q`#Yok1tG
z?=Ky&N<%mr4dL2?hj0Vy@kQ2_RW>Z$&@7}|(10T^5o~yU^V=rIq7IfB@Tu>pYdWz2
z0z@|mu}rMoM5w9CTb`;^8O&V;VJQl2*Jm|X;-k$HcAW~~Oy<O=z>SaObd{{z%8)s=
zfz99I;s@fndoTVfqe&!CoS*x7;W!V=M3^MG=XoUPkvBDxB8Wt|z^aSwI*TNdfE>Hc
z$+JvS+4y5^eB{sc=Tqj4vgRu~6PkNut<XY>PKFkq4bAH}lJlW+oe<4GBbs|kG_R8v
z8nl&b(6ZB_Sws_zz=Zfi32CH^O-5Yg?@Xt}`Q={(%6r3XGLOX=$%@W`5oUSVR=|^T
zQ_ejpI`3K0!qcKM<9q1bXW_}uIS^eT#xFq(1H2q!7{JPf08xrOj4KK;6k;gESWU!$
zFU&D8CJ#{y%x}a%a}W?+>PAk%iGq{Gz)2x8$k!JNR~V$$BCm1zIzxJ;!QgktgNIfc
zipnzoYC|q`q`(8qz@x|<W#un44XMJo%BWB6s4Gz^T%yjg5@}HG;zDjQ<kD&};<1Yn
z{Q(Vc<f{=<jLU{R+0OXwiNstbGt-orDKlHl5hT8z{|qzBp<;!M6TQSTwPJ9~SeSn!
zED9Mb#i9%a?+V@*0`Ji?;r*E`3TZ1vqfBLI%Ff;ljnWm3dLuMSdbv))aX9nWoTq>5
z_wl^!iL}>ubO~ak3<!bTg|7xIV(_atG(37)4H8l|3|+U_bMDgDuyS4<`6Wl24C_~e
zo%Cgr{DHJ38%5`Tog??9lBdKHzDO+cmrEXey<|>W`C3n!RXw-kXR};zm{*-qo0n)k
z$g(Q^K?Q#Nw-^2C_CNMn_Ws?8{(Q0XYEObo2v-j*O`yP1fu#aV1(slGV5ym9ApYsx
zG3@9iWW*iv2zQQ>r5>6$;VTA9qH7j_mSJfa5$-YbNnkmDEa8j9qQFvt<qCr36R`1-
z3pWN)3pZ-v=HZ2#mEJ%#jWQt`FbRSPSs?ZYV~w|Yv@*hwXjo@6BuFeHcNX*ucYVwk
zJUk;E<_q?Ga)x@jM*eXB$U(MX$bVW);fuwTH}uaj_)E9E<Y7<Spv$2F4-tU{?#{_6
z!Gokg?d`oBF=z)qUBd3U3}iKHg+W$VAge%Dfvf`Ar+{pq`fXNQD}u8Cj&?v-6lo2G
zv<hi|L!^~l_cu!Hm0tOG2wLdyrW-WZlRNuPu4}cOIEq~{p-ua257=kcFuC^b`;I=k
zqVIY`ytnSm>x?e*6Yj#Z?)ja4EY2-4&W#qjyKy?}C&E2|^}kpRtzY!clM+?W?f4o&
ze(_#%m0qKMKe>1>3B-qbt8q4Sw_uHD2f}uv?h08tEx0kTxh|^L_D|qkmjklOsBP|T
zH0!%U=C@C(<NFF5V%ZxlqyzpyxSFupm^_#UZuO5YcD9@Kvmd)Csd)$*{D!|R_Ag23
zpz@!iYR#)QZiS2dIxJwTS+$$Bzl1CQHmk?+1L4X)UgqlAd6VDpmq=a=sw|+<4bDH$
z+GncEza}oS;y3<1c->zVw1Z!H$kW8?kms9zSLVU%#YoxIKa@0Lj7g~pVn=w|@-|G(
z*Lus?Q@Tcu3&n?=C@%U4u!+=8lG;gn{t@7rV;aze3~Mwp#u^KkbpY7ZSSLEbSYr<n
zFqQ^9*6(_~p*k9{%%cIl_*z8P&UQrSIU|tN{DEgXv`4e|{}eXeOP#+$nq?AGgPLU+
z*x)zwn7L++0izkqGHe2rVJpL4nqk}RcKGYgMH*#d>JS*nLOL{osj&eU=;g#T#iNnN
zGLMlHsLWEC<uZ=q{m)@m6n#X$^0YG26B>*y!f&WESJEnxvdYI=rF84y)ovWz-n7o@
zKiZAc#0$<hba!)(h7ImBXc~Cb+}s_xvboz&R_VvigSgM2WJP`KJc#=YO0qO|9>jeH
zB}?~X=Rw?OkPzp{c@STNPnNI8&V#tmpd`+*^C0dsNZiYaw*vSMfKc|4^C0dsXc{<i
z9>jeHC5z`{=Rtg1AlXS7?+;W(^@;N!zI7p_K5`z^5`FMYoClrU3XA*`=Rw?OkWluK
z^Pm}zN)xX_O`60xb{@oi22BGe&V#DrZo<TQ5ce4*Y#fZ72XUW4N#@7SgSgKiVb^2i
zJc#=Yng&js2c>)l$@UUn$1wKV_Yo%C(+iyI#h*Jj)wy%ETKFOt2H-NY6^u+`5TYYa
zge)|^FTjFjU}8-8(t?IH6D^e`Y3dbN^$M(-O8(|k$?^4Pm6oGny2OhEvamV2fN;e?
zQ&*s=KvRLH0?jFCwpZo?0mR+R=s>m}XPV2rK!6zP0s(b_fI>}$nlWlFel>?+wVLw^
zS96M8d}}6ujeaz`X&*`k#*1<2o!_mjXLhUk8#?DE<icB!U(6Y_WH0FTzCz-)8++I4
z6_OW!g=GB=zd|y*<?|X`q_6mP&%-x_qY{mr_I&=*3r-FekhxS$X+|Ldet}J7_J-u3
zpTbf(kB|&{k}-B}=kGa#$|6$9a)uX8Mf{QaF3QL`g0PCnCctZiGXw%4oA6>Y!w}^B
zU><f*)I|^gPwgU$Zoc#n8IWc3lz}=m(Rrk^gPovNs%HA0v?-)GmWyIS93p~^w>8~%
z+Ucs$v{Q+rzU{C)38{@;rHX3_vGlI}R}gZQ+0~ScQQ9(N0%a&Op_WuuCfS)=GMrm7
zvo4aqAd-TiRDDRQaMDOKo2p`S#ZE&H><$$SIYE#aJudK-(*AL9<k%vHh;Bhg{zL71
z8j4X0<65Sns?1iHrOp6o%G2m5(!jZXKxEJENh>nWPf6PJ2P`1n&7*Lbj`h@~w|Npp
zeV?To45Q-SU-jLd9f_IF!_ZjVaYHBAb^Q%@5M`}}WXvU&KcWm9?8puLj0z}&jE)y)
zd(-c@zARtuAd+N^PYna^g!vowJE=+HBq20e5w-|Gm|<Tg;W&;*W;VJF`tIC1vt55b
zbQ#FaXcqSWxV!B8l-o>!0UQRmGwXVJ=s6M3*zYL+*buWHkKbKNqhmKp8Q{1*)@}dB
z9?W#~SnI@h{~l72D&)NhIqLF{!Gu$H;I=$^8o2GR^nSJk0)|hkKQWb2k$P<GitCGy
zIgO#)rh}>zxi`}y$`=_DWo{3)@6bo;o($-wAGu+wIg`eWWzJlmks;BFr#3tyZo@ES
zxsID<TK1gQ9(fiIMU+aIA|eFv%@Q1MF>4z8!MRJCE{?lEIzs{?O`jhQJsRc;uv%)_
z4?R9}+kz$M5=&9#f5&!fu1Xm9nM2wUZ7->eHOy&qToo4Jmr!F2B{gvo1ie$6v9P4C
z3S&)b*eNm*Dc#5f2>>s2;Ax+Z@iyr7Lpm7jbA-MYtYM~8IYIxPq2)DgZ}+&R6-ct=
zBYMPg>d}LZdcGiIpNw@@nYbt;Z302QIkY`Dx))VO<pv3E3gX<YeJZm_jbw~{T)(2t
zrOlqrL}@(;+4whKj=NFMi>vGSzE9^<aec=dwrS0Ft~(*0rH$uNqV}Zkiwb*zA0s`>
z_lV2n*E^fHMVoQu%%3d=S!{HT?JHCB;)oRsvkrB`7|8swGv)Ta@7?F;nRU^?8}ea~
z)qidGAlv-uETrTc+x17uZyoP`dz8Y?LE};K2YZi`Ki;lCq6BQ@j0W>n{J_4U)j{2}
zqdM!_`+cr1)5ICg;ebwQaDKBfsol8e6pHKxksMGaQ1z+O_t<#(<=y`SEsPXH00aR5
D8X8?!

diff --git a/doc/design/cluster_train/src/paddle-etcd.png b/doc/design/cluster_train/src/paddle-etcd.png
index 4e5c3d886e65a654d734788afdabab3fd15e0632..dad67a277296ff1719a968abddafbcc1277721c7 100644
GIT binary patch
literal 50387
zcmeEubx@Vv+b(Rjbmt~SIybR5As`@vASnn+iF9`=C4zKGcPc5}g3{d}T`H}DARP*L
z)<%7QzwdnW&G*MSXXc!l^Ul1ZKKqHa)^o2ruIsv=P_?^v2ykg|(a_Kc6y$HKqoH9i
zp`k&Ru_53{QN<w+_ygTV{f;zR*)ZK#@DChEc^wxtG(0lY|LAC`=`e7@q}6?GS8Ww#
zF%t(nUL#WnV>4cNJ4bLf8k)Gf82GE5nX3_lyPd7Qi<r9v)8!dr;P0rzd`t|Nr?}ck
zFlno(F~~SLn=uIS3i9$ZN#ZgvFo-*wnv1F5miu!#_?HBerK_u>7$4u0Cr@~v2=F>M
zTks)7MMe4ek$gxb4>*Iz#lzm!$eqXDh57eQ{@&-dnTv_Dm7}YbgFOT4zDC9lZmtqc
zOsEI_^WX1rx>}k4>q+)5f2IW{$cOrc55ddN_s_k-rQ)cgVlobPj?QK-F5v!>LgJTa
z{)c1#dd~0j)hr!c9l$C$TbU@>yP7$Ji(QRSqmxAbx%$68<A1)Fs<V|DnDyn=h(A~V
z`>{Xgi}Rr-{%;HMdzCMbf@PM(73cfsl}X}OtRO$2p-G`B+`e_+9sOH|TlRg~U%?a=
zYHB2H_LUF6b>3EkE8BK;Pj745R&Lg|Zg;kCZ`*fD%aYty+c^GmmVR9S<jJhp_3haY
z7QgDB45z1v3XG(G*cbhDIy`6^rz&}B>chf-jw3GxA&vHhDk`Fd9wv3CiZ~f7gxB)_
zJ<Ou^9jd0ovytJqmE;TkbN(7*_8=FLmQuo>BiY+H9Mu~al*Iq1&cJ1MTu-s6su>I+
zVcui^WK-UxY4G&$%nCW2>DZ1F@4eH8Wzp%y0Vb1e42{>Jkl6}kVesoGF`Qi6EPGd`
zQE>PAf@4R$(+bxUqjDB-8pAEAlmuTRH3(@T1!P3>=Qk<?Z-im1?=_#3rKR9&WeMi9
zgy3fV*c{bGB^@PYQ0uhTo@>t<zvib6BM0CvBNE4iH@^AGE7l#Cz1!BcZ&=IE^7$b&
zrERj<>er`1y>xN@Q=qqgIgGoks-G)BVA%6=a7Zw?gr#o}z~B_fPQK(si9OjqTT3!H
z+L}ME=pgjkcwIpJpd*-wNc85(qW{MxFNex5N~%Y$_tJ#zJP*D&{W?Fk;Bh_p^8U?-
z$MaJU9foF0>*h8-dVN0!H>t`OR0pq8N(a1*{;=tGRt7&gVv&oJmG`P(xC>yMZ@)MH
zMD84qtKQ7;+HQCmV$>1z!F62a&Q&*Z&9a}{?}+@T5IuBpwXPBRIdYM6&I3H61D`5Q
zdOmvZ59w&oaH7_?1WduK%Vr5Yh#4E7M$ob#<8kWaIV+jWhV|59ujB38K^-2SGoE~K
zn<iMI<k9_B*+cKpbg0=~Xe=_oVU)!CNX+$1>ymBtP~Dt$v8X+)_e2Z41%5wGissCu
zIAB5139Y>`%+D@PKcB9r@p&Z5*y=K3I+U)y_E=8|?r8JJ!V;xWeY!t%JxcNiN3h0S
zQL_-bn?7d&IAoG%I~{>k`XX!sD7(&vpiIH{8!K4tUnDb6k3SfA?{+=;^z`~|Mqg>e
zx8PlRZoH;uJu?{VV_QKP7$G90V)gZ+{bbprccv^$o8>J&VVwCQzs~k`1i90t%@4nh
z2kQjadz-Nw0n7OhE3FI}Br|<|Wva@0a-;lH*72uj*B*3AF&0^V%}eSoKHupeBJu#Q
zf=CX3THkBSF$CH1n8Hq%NaBE47EzJxguZ(Jjs1tC1(y+1as3^5j$1!x0smZb#C7=Z
zH9za(<krUnI}nMp`1a!G_ZDDnU$qPDTh1R(kTz@ZJgj56%DaV6Qf2=WOfZ;ybFz5)
z%_kqPPLeyFOv&aa-LwwTyK`XQe5)F$xX1W@SqD<y{DbSQ-OsHzs&b?sAJ5yhT>RMV
zq4$C_f0RLOelymcq<a}_u>+n2sOPdovp6pLVd*=zT`{V63}sC;+zkw*)a76?RAfNn
zxpgtx)EhW;QP`9<uo(%@qYOZMdns0l*Jzz#s=Vp=<IkG$yLfDs+9zP}QIpnRMqg^E
z2t9~kX*HL;dgJl!JVfr_$$CTi$GEHegQsJG0jEQ?GnT4HBxQc^Z%}0v8gG1W`9+^0
zxDp}IoeVqKdh<!`-k_N42_F&x-gs`DFOoas+_?0wTPyU0^unK37d2Da{o|RwvSbe`
zTC`|$|MhT1nZC&4jd!D%e;gr8^j%1QiR)2#ZS0?mvNynvDr(02|2O&nYx4h$oz!q?
z;OW*_BAk{xkreioz*}MZ^|5Dpw+G-*$IvUHg1*!~DM$FAv|*s?e%(;9Qnn8^Lv?oq
z^?1>P;zXW@q-9w_^XjNwV#TJGc&lS~rKg}_Xi5blq+mExVU%+x>S$goF2g1VxK}P#
zHOdqr`-OV0E~8pjwI*l~J~h&~6T9W<<h=is9`NrH5VIJaX4UDBk;dU;s*9hqo=v4D
zT-a9yfnhu%0mB%hcvH5`NSj6qu`B^0=f{?k!V_%*)Z>Z4udyXi7BvsZ;EmU3vQ4ip
z^N>}Lg!E(dXrPjp%MG9X75IA<3yZsdcQ#9OFmMP&>2o&^!ORz){W@2%{M<owb8uPt
z=EaE#*<PybW$MGC#tyD@8+$*S=JDz2@mAfu{p<)eH}JfdtK<ZJ?AjH0^Edk;?L{RI
z7|M3C&whL@NVBa{_<6LIr0@9^<W!={3iHO{Ji|A>i&H+_SZHu*@i~_cFK^uIr_j=K
z0wHH};`R8fMb=gJa)N7;-qVksWB`|m${nsgS%}fAmF=DS<AxMfrBWVv=38;^hVwOR
z`<$-5p1eY(SFx7(FcZW<tZUx<!N8p#y$a_Qo4l`K*SOma4H4ZesXm%9i84#nS+5%4
z-5kps0Lje9Hx35NI@;cmLQ^miN5d@1r^7dYoq<%q9;6fOAEW-x%LDtYw#&9gdsSh&
zP=oo-3)}fRyS{L{`&X6ea1J&mH;c=gJ;8)!1|L27Fj;Le;?R7WZ_@MPbEelewM9C@
z^^4lwO4R`n!Xo=N(p`peIN!|Mzq82n2;$Kz1Ht9$<C&$rBz;|@KnjsAu3v>^zy>aY
zulXc{>Bt!W66QlUXITJoD4eHmhR$X1eZETCdTE5#@2nGhXP`u=_~?7U9eNL_$HCmq
zEU<`8vf-2>EfzIr30nF%%>IbxXTL7Mw0MI-Zp*S>mR%`gJ6o0aVm~JK@2lj6lsaed
zHw$B*9nCwy-}(A+%@s<&Yd$}kkH93P7f*)a>mpDgzm+Up24A<&-54YVnaP9y1P@ed
zRb~7-qu1)!MLNhlPfFPzPQG>9TxDF!E`;rL{gOy$ilBLD=ywGqIlg31k>`9ZXR+df
zs3?AEOEM5b8evm)4Fo>^%Gd=M1am?=)(961RkIHDR%dB&@-2VNt`p`canfsWIT)uO
zV^1zR{*#mcJcQTsZ1^ADS0eFgmN#CMPoh#lu%5g%qtb$;X;bAb7h#)|(+1C)cG~ft
zY));B$`CyTnHbEXaXr<htm$a((c{@GFY7?gxAI^J-u2|8$LE49ALDMOySqYNz1@Fo
zuwM$;AZ8&acNNI%N`kt?2&)H?1B%?G0k&_!jb*N0pE1NpY}d8OaC`oF$`>7DO4p_o
z<W9!N#%5UNG?}hr3c02qImjq_BB-k;)RhpmJImpG!#f9xI^m@+e%l=b1ypa8k>Z0|
ziX#8UG`RS6L8jQRwF)<*1!HL@C2O->Q!XY85+mwedf^-B6V-TPf?j;um+PqjIYVwj
zvY_fKOS9%_rO}jkMc3j%UT82CFEGe2Gv&_A`lr6JlyAM&5u_Xu^Us@PWdJ1%`t9()
z&O-`90NyPP5+V27C4dX#eL-RXCFYU-=ZGN49jtphVgEQ7H3(!FyxiZnQQLyfd^cL|
z44wRlyUB-+T@e9%Et&dr>6C=n7Zo%pKb0DZ2I2NjEydK=hU78(4jM~rX_o5N+Rny0
ze#Ny{Q~<^inCgM@W)r07bz-G?>ob%%__V^dAmz8JuT0SS7>T;gMn)^BWj3z*ZI$yt
zWPq<l7~iI`w>8tW`=C>Ynt;>_PVQ1(%Wkc3Fw~j!<n(h^t@&E+3eM%g3t9XQPVls<
zCiRPhaI!1AUtCQKqJ3ziN$|;BSmlpHjDjYQ#l)I(SF4YaXv+Lq06$&k?7^a1`dW#<
zJ~uZ2I9S%JMneB&TtXms(%rl_&-~Zo6+sF0-`>QFCO?<CBnVi9Uk#a~*Vmfk=)<4E
zQX`oXzi`Zd8B}iN4RBPaK5gm#W*K=|bozCZomf2iS_8i`aYC6a1Np_Q_WLABkYxW7
z>Wv2B;w~gZ%#F)pH<8D$Zy~xTExQ}LqRACFwSpA$RL#=<!oY)}qRRR1b(FPp;Q0xD
zYk!y+q+{7cS~z$#)?eb;)MATr`hMNSqp6cH-TYDrBw<!xH|4>VH_so$15+CP1|6%w
zbEuZ7fYjd3M$Rj7S6hG0tmOH-Jmh~4;%?xRDWLX0%KZGgN!84S5|h9FHO^?MTOh(*
zF+#@t-VxwLOE|ijrb%du{-0~malox2l$<GC&&A|d-^RF&_L*6|!aGa6QXGg1<%JC3
zd{#pP5GE4pBc=-(A)vmqAgJxmN6oEJ19+V&tDbpVoco-9z)fTh^){a@hjW#-OeO+r
z3Ildt_IAGeGARH<R1*a+vA^<2k<^kB0t&$&=f|JY*2eVjgV9K3kb%*ECTQ*WzCUu4
zuL_fuGq+bPSt0|f|MYMY1Tj>QyK%Z2H?GsMY;*XvAj`h0k8NxIje+OaJS%g(DUa3O
z7*N0b0xll07Oyc3D${~YPwV=r3Zo8CMiu~v>jD0F1g7%+-S=kgU_IF`C??9j1QWqH
zD?1faE<j1?b@2X9I2=LQex&^7qbIj&s!5c@Q9pP2*>2A@+J}1<Sx<vLAFAFwpO}eY
zRk-Q;WfW9H?^t9fJs3fG-Lm)`ZyiLR+9};lP#FbugS<#RPI+fkn(Mvb+`E6pSVzmR
z+wZ>njH_ICy7>td!L)WY??HvUzvxfoY@PJzvBzE?J9x+esP;$1HcM7<9rwr|#C@@i
z2a8AzTtf-|XSmi<?N3mY&e*@(s!Hb;;i8mY98==EiZty7j}{*i90x`8mk5Djk=;%*
z4>P{{iaz;9;AMA?4BQh*^KY9-fT9*m=P}Me$(vXD&(04)26Gm3hu&(o%FH=CI`RwZ
ztvjuBM_#+>b2zCLrM=zonVzY`0#tTUy8t_QKT~NE%6?9AhmK^0Q@qn<15{<9y9?c*
zz-A8(Rh<j2r`&ychFZk@o<{O<jW@7QuVnGIO)vJAq!E%@zSqN8ll1mLfFZ!4d}v#l
zDdDBw2jczi%V2u3y<TRSwd-JIZhUVf6M6LR@l)_{HN$sXb+kGFuc#ar+eBeAc)az7
zWv|2@f1BCaz@l?Rvp;#6*>~gUWF;zU7yDgur9q1i!l&i@D12AT;Hg{0C!^r2#^SY+
zyWTk_XDd-R=~v>;<;RWBNR{9u<0kN^U6)}|i-?|Fsui%AX;%n<jkB@?cP=_6jc59R
zh5lwZ?_0~4)Kz)mfrcSJ0LpT5h3IA#x!1(KI}$&?Rnyv5+^X}QT`^BFO9YXS`=_W$
zv7c1Aev?`sM{-1e^RJ7a3$(y1OZ!NiAB>9|%REQNz$9kKBvR+>Hd(wSL#*(_^j<`5
zp!;3cmG|7at8pYzQ$#V;yOtR(aTfVm{6-LDc#C)J+Gkv54r+?0TeZ{D&q215vGH=B
zSREunHX&yl#pUurk-~;LZj%rAz*d|pedqx8jkiuVN$eVD(b6Q9wjsKpS%>G?V78FJ
z#;?O^0}GXrl0H~gyv_`RiMP7>;nw{1?wk9A0+q&1UdJzXzc=pRjW7V#VIuzB5aF{F
zNcCcuSuP@E7i=BBShADN>BLX9V~OEfPzQP5GFV}ixYd+fwR_B3hFA5F@Xx&gzS9S5
zca8u^WV~t&vOI&wvu5v}d>_W}C-niA1Twj;4^KX=G5X;%`Ml0c3_GX5^8k1Nj7$`u
zBQm_C>A>NfeK`gINs^nQJ^*|GvPCDrhS74ihnG(Xr6$72d<WKk1YcmV_2+e>Tma*=
z>~_=YrB0s%2rCRXE{1j;Km;9|f$Vg_9_r4CFI#VH*ZaTO;LJ~cE)C1UaTuw=j-ws%
z!4xAwZ?x|4yTj~9p{Z>YBTKpTRo*w)sjBFZ%)F-f-X{iofo(EZOlIS0Jr{yUp&?P2
zjP!4`ac%tR#{zD|-M}512AO40WP6C$f>V!^60>4<+lO}x&I16X@T#|V3L}}6ArxH&
zso7`q9O$-D_w0K*k1%WZ(7AA&n{%gGR|0ciOCakCHF0({`yANV95c;yS}s_vP7KUf
zV#!SzBU$;)2VtuNz!xdYwHH#SD-mmh)#GuP!OjO*ByW5u7EaW|i5XBLC_~6;+gZ~U
zDcgGolm&*!g(z{{0|J&{Y^nErrk-f(?>f{kVakcM&w^^jKVCeu??wL-WoJ>F2woXa
z=gKf?&r6Xr;GU#~_Hxevtl}|&kiK>9F*Z&t9wWmz5jJgeX6}ArM5}H2GSD&p78Mfn
zwqeohd}HqbCp!IEnWycwXK7hzKVWOP7IHC!;yY4w9lOsyd(q>Ip6+XY(CMx}O?R8p
z@n(+A@+TAv@rr#frK|~zm5GmQLDu`m^HRdwZ7w9?G_SIf0y+q9z*X-(2=2mFl7hTu
z+Tj~`R%ER?pJ0Rw!=>50Z_6gkCFd>D888m=yNC7-SN2J9lFw`AK3%(uNW!J1m}5N&
zXTiJ0s3LJVQ7H)FpkTsb(Bqff?N!KJs|?u3&Eb)6BD*W%$IUhp#GlBLsr^!$H}XrH
zu<z=!ldNB#QMMi@bu;6QC<K>GN==OP&JPP~STF~VE4>!cN5fvu0IK!3j)yr>k~HDd
ztt~gRebJ999Q#}1SXv8-F&=vplgZE9e|W2cRGMU6l<Z6(PHHB+Di+Yse7@`0lRuz^
z;T9Nx`-$8<c&C(+ARn$b%5PxAufO)4Ncg@U%RYG*p44PaWjhFsp4gT{)|t)N7}xr2
zn3<OujeL%&zcFJf5iMgjJ%=*xfh0WAKd4Wd=51)_7%`%-kfR+{dVts)c84!>Pu!s*
z%WP8H)FK+O&NPHdl)vb6BzEL}wk?a#z0-XqQdZ9S<*6}a=g!H<iqi;%lZE*Y31?z0
z0ZeLtM|`&(2SzB&z(fjF!t$^<P3A2f8MdB-LTsrP{a!OLZj(1Mu)z0rbZ3Z+J&8Hb
z&C>S=gH2s^$Dl++T86Ymd3_za{;4xLOfzSxA*@J@O;Z~}Do+6t3jtTGD5xOW1yi4m
zKr<N;VxGacJ^Gr%=Mb2tod}5GquM9&0qEHg(!|{FN!a6Ms&d;nZ!dF3dExH%!o5pj
zfe}=8FvE0K>OP}FbtE#NI^R?|E?BSX5zBfsqEj2vd>fI@?VCk8BAf1=<+;H*D9{i~
zZ4eBtc#4GdiV@B{)zY<M{V2oRxFaoMc(Ho!cS;w5Sq8v$6xwL=S~=2`E?vQUrY73B
zQMv*KHP@|*W7WFnCO^prtdW&vG1~~oh*Fo_pS)4ZL|M^%(JoDE3+6Aigww|8|J3~}
za;)u3w-R4dX9%rx*d2K}Mr=0B#T~;n1v1<v{>Jeh0J&ClX<+qSEqhT-?!(nrO}(^u
zn}S(%5aywr8D<1wWR-&-(3xEC)1YI^AdsPr!>I~F$n(30IO+iuyqb-_$_JZ&D(PUN
z+aiWYSl2f1{YgKEuqJO}UhCKBF6Q>6=WRBo5=Pt4;137STJiWjuAB1|`Lxaf>?NBH
zT4SEzZ{3Wf{vgA+Ti7cfua`ph49;>T3VML+@cCqaZ^p1zA%CG-9mA!Bh7t+FK<pKh
z2ONnJy;Y#DW?$ig^*#RJrbU<Q7|_m%`|CK@j+5D`N>Pub)c>ZAT&iQT5bKCQJ^@=n
z5;HL;OM{Q<)L3jxe8i?il(H>_`*`(7^Su%y94G_?iH8%~F*xnv<x<|ei`$V#AFn5;
zrA%lew5DDy>tLXATN+uZlt~UfR{01U?9|sU4Yoku81;!)m}klrdsAa2CEyTy`Vl5B
zHPh`J{)v#d?|LpnlNH2rQ|^To--Jjb;h<iXDB>P8?WDrJ28SC}9z`s?oVhFZm8JjA
zJ4ey}C<_y`m)+KQ_~gbglb2XQy3Y|f4<T0ooL(EJ9J)zyXp|e&w@==_M@C+?6hzw{
z(TBE@S<hAdgUTH9{?!X@y}$(a1`dpjlVTw{3>EWBXdPt7^0eq!1l%8(2hXI>JKt=$
zdr%gcyJ2YNFOhF58M)wE?eN{gtRy7n3^i)NQaAq4#{rFe{)=xc_;f4R^eo1$jGBxw
z!YuqjJQDMJNKaLl7+wgycV=%09)C<9k&+!_thQoWZ-u&4jrl#pF*gY<Me70SnBJ0#
z0%C#o<hL;Eghn;&Y9AcMm|3GoZ(SKz$W+B|DTUf5kNv4Kr5+GFdW=;)(dgdGjo4f&
zOFK|#7YwXxB}!t%586??>13^6Lbr!k8-Sq+qqfqPR}M{Y-<}-h=4R`qY4wcPa|stL
zdE2=+&XHM>`k3D0R9amfyROi*xLT80PPjx>`~kaPK98#IcHYqj)aR;jb#&exJOz#5
zFSgDJq^-7oIN1v=DSEOtnt9)e^-7!{k-qdFke{mN0N(k2rgW^&0pgqmgPMjrMKY>Y
zk&QW>-7HgsIxwI^{ID<eqx>J#0~hB@T0;BXO2xm0(F6p&RA=Z@I4IVyef*+e@?d}3
zs;bYik86}))4gA?W<cx@0>LE*5D1)!liB;rNd156P(cz)%#^O@ekEs}%L84GhNb8W
zPqgQ<lcl-hlI$e7To}ul{+6Fb*n<8Jlr~ruKI$>#+Ks+Ly<zaqZ-2)jpY09h;;Gi2
z-2;iw902aZti~QXG<E4F0VM4%A}|?HQ<p^>Km!t_^2{i0VycVCxIL-HxrCpYDHN>k
zC3nS}>;IW10y;xx3(AnzkP47bRQe3j*p##hmsCn=%9*~m(v&a_9s)rq)eXJlHHhdd
z(K3WO04bW$jcl@+O%(vye>b&7BjCIwJP!;5Erh8}{D-w^Za6>QUN{AH6kC2Ysps+b
z)qR>maX-a@5|AF5fWpM=4fRoxNu<M(pHbx=U;&$Lrpo~PqThHrV;T4%CP~j(!9E{w
zU&m<<&7qd{#WP9y@?P5Tp+87SwmhUBFQ<}NP&_UA_8{FI;ACFSfIMXz1go{sZpmvm
zwra?@?`47XZprz>yC*%?zJ<mnmDfMI`)>*EJX$B8cgU`Ezm2r7ZpY=Wcy$P%IBJ^^
zUlSMY%(PB&b!mC(YFa?<g}rU9mO2HvR(q2?^K9dHkW0!I<ot-bdEsuKI>6VGK6K*F
zZb&9kLv>8~hPgNxm3}_`ad0c^il9Fxfw6<uvEl2T$mO+5oPmJ|{b*-#mt5HA1ISHr
zP?cmagRr(q9cA%RG!-|-6!dWct2f|0#X-g?{&QwAU8YXbWFVT>z8-zVgwUa3EfG`@
z!4ksQ%+<tpwogFuvhUn`w>;u=1+(1}YxIDs3rS(&peLwYsPx^95<|mEyIOE5Mi{RX
zEQj(xo(3ebU!>X5r)STE>sH7$HMfOLb!LjLY?X6zLj*yg3T)Sk!50&kCcK}YB-3IR
zfSNrk`0UgX*z>m41UDwj-fk2Xq&q7X^2h}P4h`F>o4phM?pqaL?w&;Jd^-<&%Gg=G
z5C2)<gge>gPDp*vB%P}<K<&k|v_L6YEP_gTGmc!4G;;~Rvlz)-NabtXX$RSYWEZ&x
z-QyV}j-r<`!DTgLinik9f|ToJNd`V1aVo;04fucdRB8qqrJw25=aJSsi)~GwhbSm-
zH(=J#pJ1-mCRha>kCIboCgiJL#Fs9jD$chSJXO6+?$ZGfr`zUeBq?1>pduT-sK73l
zOCavwb*LsTOdmA<lImrX6Dh1%N7mBf-s{r`3jWV*Cij1W5_k2n_(ow4imwMrRv?MJ
zx^q@3po-JO{(V5(ynSM4pa`K0DwqWC^tkWSwMJurC_ISJU+IbZK2ym|B%+MdHv%Ab
zYTP#O1UNwl%s;B$8e!bK!xW`7EL;>|Y8Aw-knN1b8=v^GS)Nt-46jV9Z)*9R>IkN9
z{W_0749OFdgm!=W7S`1OlJNK!Fg7P@fPR4TD4w`=9yb4=WKf1kqH_hmyaZKYKgZ)`
z$fOR8_>lon32n0G9~<03*eTF%DV5-7k|z$k5#g*X7|Oyd6f~R7KetWJ5b4(Uar(ya
zwPK|Ly{jF6bnIVEI*O>109AckL6p1T#h3Rv)WvMEoeN0|Iodi}`W|b55+GUOqMV3f
zzO#(Xx+mXHv}>*GK~SnMhExAcO6+Hst^xxUdZY2h&riRQo+0$cdsCo__rn-mjZ^h$
z$6;SgirB&(Xa}Q>$O{v!w_bWNh~HT~EEXPo)kH+=ksF0@c}d{Pz3RiMhsRs<RG&)f
z%}sL@dv!}<C>YcjQx4DeM+DL%k{-y#@oOT2A>~p*<kU>*02iPpGxd1#5hIywz?48a
zXYfO7bH9`jGRul4KJK3U52D>?%FkF!jf$}AiOh@McJ#{|#zNg&D<LFu=u<(Ra&E2Y
zSabjgT9r0I)DnMB0@#F8Z*>Iq^Io>@$1tV^v!q8MjG2vq59tD1H)c~B8<{A9;-;5n
zDk@o5xn@tQNDHG)+fFl<Om9DxGud&uGb}LjTw}o$z#zitdW)*!ySKLs3@yz@iDkHL
zni0pu1}71u;wVH81ryysk0*=bVIy6(>Ea*=eyLcZ7)FFo%H>XPq7(AK%#7*GNP8nB
zCLI7xqTN^y$L=r)+__Vji#jHfi;=lJIDrwYO)aQs3}0K%uta3Rq)|0{`X`+$ENoJB
z0=v;^3GZ<EZhk9U2!WYU&%dzxyRXz`fi5d6=s|mPm-G(7N@!mKtIcvrOjlBLtv0ig
zbhb1SPqj&JQg&~JWo8Hd`2DK*ZF^?(CShgGNd%mzo0Ot2urqZuJ(hedVDm3C#o%`a
z<+?u3|4d?I{~HS80%!*1>`_o4e_WuDZmQZh6wswa>=Kj~{pJJ?<C}f{`otN|NO}Um
zIqGtYsgQksCNd0C4lH!KM>|G*clr`_wxFph<~&UYW6Jf(R{1GK8HRD2BkmkFl_;f(
z3a?)Tb>O0BA3oJO8L_2wWpZtjj(N)ZTpVqr5YWf4-JgdO;4}iM09<+~RYGJ&5<n23
z;CR#_h$0R0n0#FZK*^y~GzKq7#J+DiTESQa9>T`EHo9Xg(aHZPE0i6D&LARHDBDX6
z8y8S~<0@o#$dpDYt~#x976<>DB;7v6yFugF)w_ma_J!&P-F8Pbx|d$nO*VVhRP)ij
z;W_QZ12YBw%xAEy3Sj{y-^unFy7(*i9nDNS-tSm;9%r2<z`c9Wh>;=gQ}&Tpx}`&m
z&fo56Hv#g)7y^MetM+|6AJ!W-^~SH(MN~i1eubY9Bw>~H^eDwBi2BFjW{)&&NnKC#
zt{%;2mB(_pcw{EhCVH`<Js{>X2d*mg=>$PMcB)IE`!EjE0iHL2DdX9#3CR=FraJ|y
z9J5GCedto_Z~`2^BY?(Sq}uzMU;T&MC8d7iQ2rOl1n85d2*E)UYmt%b%qm0Iq<ycP
zSvRK$14)TIg`YXCpfW{ZzZRwv#@>~bt~{I^^LFPF<x0~I+oPH0?YEoKr9Y0pNA-oR
z@jw3PVL=X4a%9wNe{IUxnf*$;K2TMJ_SdUN99a}cCd&AR`-b_u01&-4T(xtRvtyaJ
zkZ$#=&6?ZRTmnyQh(%h+$=N#p3z;>AQ2(1%p;vAc-t<Bnjn+_=5F(c@)mB<xPjfI&
z5oC+5g7ZAgLw+~vV@+ad^|Z46;0CxP<RZle*`_TB#*~F7=c2+ICTG`Qt0Cx_U!1Sr
z`pk)nf@=)fLdA6V*&nefeKa9Z<jsB_8Fp2!FBAtWDfJHwAbEbEA?*ISgOhF!K*4%7
zcs^)!!f=+&KoVY9R_vx(G=6C}O@(_u^P#o`-==Vkc){#?h^#?-9*G1zKX|pBOUlnF
zRD5_{v9Fof)WWT%0)xsfm$>nwWBt>A1OP_E{tW;?y959lunmL0jTaDo*%>k>u`bD-
zxjlOW-7kSVeD^ZxQ<H?~R`>S^8}w;kg~cl6V@q>&63g`mJX)wcV6Y$MmM3Nc`khe`
z%bAL3Q$IvR$Sv6`wYOQ6Rx9%`G+E(vM#aG;G~w~dJ;uCa6}Ipkub0|{9Ww?Zg9)(*
zz4h6Z@(R3y2Jirwrra>9oS!AM1yU+$LgQScXcDi7^{<}olNm0gtPxWacGL7%O(@*T
zx|w5fNg8AapvppG^Ov$l3ep0VWlxb9cs1QJgTf)n6`yVhbfIKHns)U7$kY+XpM1`V
zZ53M^tu0-KrX*ZZ%|b8U3s0OLNq~1+3}UQBTU!jp3bC4`Mi8;h%jy$Wj&w=KD&=(b
zMxgoVdc#0%&VOCtn?X!WcSi}dJ_c5{7I#<@^Lv5p0Z-seHmokG>*q$s&&E1}cpelM
zhQKCYun{q>AX0Uyd(tU$21sAy&d4qr$EAK<`N2%?Rv*F{Vc5Op4G|?ocRV-i?t3!s
z+$DaKRmA#GDk(E1MtJ>Bk113{x&Pg0x9Rf|BYZD>`sL6TVEg7OhPl5<5{5u1onVP;
z0sl1Trn638f@tM2182nngTc%r+G{T9Su>@U0EIR|$2Q+u<1&qnY?JTK-xiTy!Vc6R
zzxaMq@VP&sgjDPH=IY#(pp9aXNXpF<fXH?3k%W+s6;7IQP)~b#M>uZvq-D;=R|;O1
zx2Q}3kRK1;`U9Prj361=CX#nBnSpm}rbc_*?>)kNehF3L20y_hxGZtad(I?}*!myH
z%cU$48*q46R0MQY|L2nSCv`Co|KrdcF+7P1HVuBZB~=waltzq2W7#v%)U<mUkhzMC
z02mhvw5?wXLYN?1<vBJ9y#WE@PX2OZr)Qa~q^@69$B6n_ZSMF}WRB6FazqSA#8UgK
z4-WSuqpyASJ76;pZ(u6puKcNMwfV@Y$?0N4ewVdv3dKf@)Mi04%Es`J1cZAJC;XxN
z7K3Ldz+(i-hS#ZQb*%AXTvuc6kdEvy*?@|W@(SCdX_vg*I>6H0KyrWMoOmgNmCyT%
zL4cQ>aMqgsnuq4n##kO6b(Z@wj;^BVRHxh@f@usTZTKoFEn}K-)MZQnbOmBE?uA(*
zLQ(yQ&c}m@9)=b*rIIv?{EIa%7Qvv}+SpR#fi3ZO_A9x8W9_Qp@CYTg{VEve9G(n$
zLA@)S%p*xz&bur=-I$c=8g3h9!a--dY=kO24oz5iP>T`MhSL6fUv<zQzU{#s)YExo
z`d8nwDA6sw$~=VIuPBV{iU*nwN%r+tEPGFxD)=P`-HI5DLyo*omZ0v{$TCHC{4NCj
z1j$CPa}PC{eE5g*GzM-WwPhcyuVmx<xbC9$@^RC+rPu^LLnhS(T-B5UV%0z7pLG;q
zuugxH8|+nh+}+n=y@#<dNp|P-d0@eyR@i4?U+ZjYizTV+W%<)dQbHua)?U3qX#y4X
zx@}O&yCoO8rq5DG5yF-w%Z*IA?ya%Fcs`|rB+djUfazv$T!7S^_oq<fWme0C&Hpu^
zG$r{-dXJaiR&`kYa~)Q-0+i5YZddprippbYP3^OUQ<gCY$?jF@R%=U$<X;B~6uCTD
zAO?;&)SwG>AaI+Wu%{(sJ;FQRIumL$zTJxvLKXJF@0)4Ti)5lia%xo2=R)C5GKuvA
z*8R^R;Vjcv@Mt;xVgb>aKwc1dc_V4aAfhw7s{nV?K7lpb8p5lZy0Km9{I|Tm&j)UI
zc$6XZS7ppF35NQ5o1UBfZz+EWZuh|9la9-8`}T#Zs{kF&e|MAg=Cr@3sLQr)<!MY_
z)nq$|_H$H=xoyN>96h{LJCbrlx3auR;;o>IQ^@wf*{N%-!%Fb5;K&*5uZ?%yZ$I21
zEmVLcoHi9IxC$3ZK>}_09vR+ai<7D*GfZ+l+BN-*VgId4Gd}`ZTyj|FcsWAAvrE?{
zm6<-J@dETtlUs3VowY6R_%sXpN2Lz^uV-uByf(B6f5f!DQ0oA+hr7zoUSB7K-Of&S
z0bltulsz+fjz+f<mu%4o6i-5LTpNy))y*H&TQsGk3YQ`lwcew#*LE|2%Tzf3oNV%9
zVv4_(f)<ERK>2b4Sfd<y^@n|I_woHHA>mO|C2XSt;g|k1K3U2eo>0FKMt%&)*p)#4
z8+T>PG&QJ1QH;_Ld^(YeC*MB`O;#uh<u_&;Bv=Pd|B+6tvhdq4h<u|IRrO-R^1~rx
z({t`)4VeJ&so)*b9Co0L62g(n{9RXB15(afmxvIEgul&HuAT$d6qHfL`4sA(8-N-5
zV9a=<7OEDwgUK4n?dkGQK=<1>5WVX*{b;&IdP6)3Sv>53mc|DxAo~c1f4pKLbGQ}@
zBs9;wmqXd5-+|=OrHevqLZ6m<l*vr<3WL{gg@ff?)RvU`1!%Ur*5fFtEI_txrA6H`
zWm6t}zV17OGBUNFz`#_#m?TouN_e^s_P&B)m%CeLOF@PkIf&!?F1L5KE=m+K?-BtU
z7T+HnL_n_^hp+L20`+>SPGpz7r<o>McFJ!NSy3p8oo)*3rV)4NZ%8(&2SCpLn@ZMX
zNn*#*(Hplefh6m6U)~*+ZNv9M)Q@@#2f0YIXD_`H!xjkWLJh&Ur%hcMbCAXIzUG(v
z0H`6<xCQ_B9e4F}K)#K3a=uVW7pd%3M^;r|Q3o8VlD=*C162LQg#MTggw1&r(JQ*1
zoTHK^jQA+Ug2wbLo{0biL8&X)JJmTt#gO!-fZuUg3UY{Y7g0_2X@C4HX(1Ze%Ky9z
zlw2hSEDgQa2Vnrm|4tmxLE?Zbbwmp8xKB3vtLgM-D~){i1(1qtl#kd|{=GE?koXm0
zoAdpH=ADyRK6s{5*Y(F2>oV_5Y8jjZ9=Rla8i)pUZ5|4}xnrX#be!piTEA#di1<&P
zT*T?u(U%=;X|{X7bP9^Lv%L5E05qLd#6_V_O<8|)HCjeW?0CTiXtnlG0w?R@(xY}f
zKJwTnu~sN)o)T|Efa%E1U*ZxcT~r3jNTo6;C_y^`pq8>}OV9&EAN4MFsi+NELp%fO
zSo$A7J04#FKc9j=fj4cKv=Q_YVmyaHDG2md4nVrEr1}*1bsym60pfiQ<b5Y#=bi$I
zIG}H}Ch)5zLcCWZuj6vQY!ws$`T+1U5L-3ll*!FTRRBTT1I(f=%wQh@G#%bl^HQiS
zG^A<>7+*Q9Ku=o$ZsGCNi~^M7U_wLH4rB~N1`S1}ZMD<-KLKul(#P18)@oWp{eYCt
zbhnG@`}`Zeu)6y^IV~V<JW@~4)LOF~Dk|Qt1Da9bV{c_ECGk$IfY~hZ%`a}Z{JIdK
zPysrt2=5G_o_`qzYXKEc6T+nX8z4|Y)%h$l9;JvVDlQ9iQ9s`PFkfpk-Qe;W<S!@&
zPI{)Y&A#An256W_RAYMvPSgm3&%beKIT%wyy%wW9B4r@}Uw#D3t^CpF{6}CXP$q1G
zHVTmrd;zs#e<Dd36YUi4zyjFYFY>VBh=Se%D!Iod;f8M_<lcn#lJf*K2nfb+1mkmJ
z(rCwO>RRQ21SPE(rCtQG^<?u@EAw<GIhOB0Z(=L~9&-lNl0h2W<&DnHtGhr^o}cc}
zaxR=ZsK|Bzc{-PKhse{Y?Ftn>#)}BTxMLJ<c|?-Ufs2mB$p^9(&ofYvXYw0Km&in3
ziPsUDdMSJdawv<I6hp!+M?SkAORjEu6($K7^iN^%+^{NU&B85EANPF%H#*Nxv!4SN
z0_<cxQg|+8w>;a*{Y+qQvOx73>54-<`x?q1fmXx_)*%{7A{aELM)cvWo6K9F-7~{}
zfzsLl8GXF(6A~+IJ)q0aMLBOG$rD3DRQf0jq#uCoodX&{9AcN#wWRN$!33b%gsg+W
zm8IF$Q9ov07O*}9%^*&XypusqiBhv_ECGE-YzgFA`p4$8J*9w09X58rU<>7zWB54k
z;@_Jgbec~E3LR_#;V^HMmJrZ0xj?gc)$_~CmjNLCxVZW2l;}Cd)gU$uL%PYe_1yGJ
z^17jS6c5Y>iAJSkO0x4R0=1A}lmvN7EU_<&$)x;%rqoT(b>8kxUeA*`D|XK%koMTk
zgw2KC8b~@K^Ep&}n4bso2oLNXug<h(5VJ<!@Tpd<i*;?nU+~2>{s4Ua2$@Z6pE2ZP
zR6&}(m@OkBAII+!OX-G-V<Yl*bX&l1$NXJcN2=ZHFd;Y3b}%C60Hjinbpm>W2*h<f
z4nPIaDBk7aOeP8To3YaFx+GvXMNnvm(>u8@5i~+!9paq+G3zbA&K)>d+^hB(h=N@a
z6z(7u7hR?A*dt#)^#aCQXFIzGM#%0xEc)5;t7F)Al6=k@Y2}6?;dzEGk-@@A@Km`a
zAPQEL?>o7Qo8Rk&)E9iV2z<-xnB{14#CrD8GL}{;Lxh^&_oicgc}(;G^TFq&x&Xx#
zxh-jo*^wrh3Dic_9M@(Q@hNY#gXr%;k9<kh3R7fYeIi84MfW_=CXhci^&^qdh4GNI
za!-dYE}F_GIKv5u;{8g#!J9z9Lo$fN+DQ6>Q=3ptw<}&JRyyo8+JyozDy+-_lm-{v
zHFR~ksU;aQo<c>?$vzR;c8mTk&^dEP85yf#FlOGnc@|bd)CTzIPKoXM72P-XLyQf3
zxh!qKu4$aXWNo1pHv>C#EHV<Li0R^^%<#yRL;_zH1J&+AmiC0S^2tvC)H2_IO@?tM
z0XP6aZnh^oODytJVeYnu*ms7JKEdR88|*r|DeZ--iuRKm&qka!TB9u+={oXZ(ITC2
zDrMJzm{3M^{(HfNs@TLb$q<x~)iJRJC5Z_tgQaBQ_c0v_+s&axe~j=jL7Qh`L&T%V
zjj&z(2Uk@m^%tLg(D1UWmIZN$fUNav02ZbsQ&&wTOCY%h`krxHdpg+%8|%sUFb76k
zm><Q*yED;D*X2@o5XSO^Zh_Wq2c)Vbt>GN>>#Cs(IGmM!B)-;r(frH?_%%lMOJsKg
zTi&m6$oU-2S_Hg%yhURoy$C!Jo}V_ib+{_&kD!=(AgN#RdSQo&9ureZ7hIpG0e{f7
zNYx)@W17tt=Ym)GpagA?B&x>idlS_)WcOio#jOdS^TX-fHVv+PC<BJ~E<cn_0khyu
z1e}1#>u@8`u!{&kOLRhDp9$fKn3&-=Bk25+_5dTpl8i-7A0p1+MEzpA^_mB0&58_m
zALFMaCaiutW$wn?%}C{i8N{&=2Rh+w2Qez8a8bmRNNqn?@v4qxa3$#&KiX}%4HpoV
zmQ6e4p@baC_0JvLI0T}q?W&@+{n{}+$_5tkFvT~bZt!cO_YZD(4DAP*ieESpn1|qM
z(Lx3XKQjjG;M&W)luUkD2ORpwAvq!aazy8@f;AkjT?nN=g841(3<``yGknP%-Lafc
zGwoM+Oy{=Pp$!X~T*Ye1i)b_oR4uvHXBvPQF`WqO0OB9?$PnKSuF#G4HEcXHIlncL
zPV(TFFT%UqmnbET4n%!QnPk+RsR-yqqWKu=xJ{`EwHa?*SU`8m!}6S}5kIJjWDH|X
zkz=357J$dsE{q@|w2O${e;}Q^uBbP;+h0<&Ey>O2r%1f`zP(V<eReI0>X!daMfcZh
zd+yc4LeqLs=Y}a^SKf8e-~vLjR7YO(?pdIvc!6N9fj(--Zui5Y52+@k7r@TfYlmRz
z%UMhFp1ruo0;6~V@`G|=!bT%+_7>Cvs@>1ISa0Bc6;`*HIhc)8#@`@bKCNv+@Qq1B
z86oRPXRdiQ;qz&5lLe?C+WBBaW<|HT3%SO3+fVN1aHZ^Ze&L#+P*0|~W_k6a?t#~o
z7*!_H<}p(Y!-CtOYX=s$w|k91P%`-%Bj2{7<t@{xn>aTZa-)Y8{9x+KSaPZ)a{Z$p
z=#XK-H+@7NZnvOXz=Dc_E?>lh(cL_Gb6LlpKd696WD%|TiBYGpPmC4vEDo<m9Ug~c
z{E?U=U0JSsmC?XMpDSc3kjr}(!NHUhXJyS5vB<N9)pH#!p-x!|H1+KfFVbpBQn`DJ
zi~=zoel`Rs#3;XS4)DH%t3D<4XO`b78(Py<Li!wd=#ebSSXp9!!&-`NX*?HTjzD8|
zWb`@i;PlS3qJpr}vHda@U1V(g?i^xj=`U*Y{(JI!;_j$uoAjX-+q8S~RcDY6kFs!*
z@L(}9PV$7FeHtngISc735Ey;gu&CS*SeqqhKKl+bpM@~L;Hc2FQlivbOgTxcHP+U&
zL}rSGDB1G&s?=|R97252pX%OV0=t=G^ZI)-Qs!rzWFyCpy+{JO*bEtcShONcjl>0T
zhlC%<>E=ZP%VBQ_xJ-Ma8R?csI#mKg+FCmdzRAH=rM%@zzreXrCCjUV_t(Ao>M0?C
z@f0Ugp($h$@0<!4aU1edazkk~E8ZGW63|;k3<dT(Y8YX3hep1X!IWA;BPcPRNQZip
zoj(2qDkDK^oxV*nl>tXTb{$?Mb(55$J|pZdL6Y2zU#gKoS>WJuV$c{6*xnU_nXD1_
z(W+048Q2H4NU}P_B8yk$dDI}MI6JbuS%b@C$p^cUtgSiN!4%4OPRg7^H1>hUHR;~<
zLHQX&N4mCIrgX)AxSsuMJ8x(#+B9($aWAp8IEy^B;GJ&Poeinyo${-9&xD9#PL_;0
zm?Ex*NW(amu1kn38J9&!8;hf38aqZ3UCVsfxrlqN+G+3$@s>E{^IP#2r_SzUZt{T;
z{83A#6T>YjHR-7822;uc_m9j9rxopFBLiypC(noYs--%|GkQcn1dIz3`9wSpK%|;u
zr;2k5(C>JdQXZ&DhxqWep)oyY{urj5Ba>nH-sm1itkauCdkSUAFFcqHqk^3l6JfAs
z^7o9P!^Y7Wy<txaDTEia(c_t_kD~b|a$gR<(!}rYNLgpD;BR3p@*O!2)Zlnz>-Dm)
z6`h<9jlfLva((?2Qe>?xIm-reWt%18q9hr|r!QN{A0dnr8i}{gH#PQ@xT+OjOh8zp
zv(OAytNP*OIOTN{=mt!fCnpkXew~ah2~KRn4*U3WhEJDnb?IPIRrQykL|=1q0~%G2
zdP`QH6W%iKdcD^d-%Ci}fxv7aqsK%9#bB>%2X^4`HAYwB@b)X+9MCdG;_Olfb~Zvs
zU*r;HF-*Jvx?o=}|7C>ZD?Tl2zT`~aZ~JADG^`ye{ahRwUj6N!%rfrH2ANv!Q>qnQ
zb0u4zrB`1DV8exSOPm5ob2_9Ntj1Z+yPI*2mqtb4E-TS{v6IR7(2vxyy9xL%Zrw8e
zl!MP6(Yy#16uy=vfJtUTnJV>=&FUdnv@*QT2(2N=1vXU@RA9{(#8=*|v-Ekm1%y#a
z2>#kmg^>s!+&ywd^yc`Pm_U2`>u7-_8#}p%2u0$!`}6_S-#Hf0DYMw=F`PcSbKLy;
zRtZ0NX)?u8(_+3PFyPe*Ef()-1JU-N$OhZ9AO6ga^=g9@gez_)i+nAc?c(KevLxpB
zx#8rYHIb5n)K=Cj#BE`4LSY`=P6lUu3&Dz=cxq|^!b%~`Y8BM%*sdw-P{$AuAqM&H
zM6ns>+Pc>v5qeiQ?+KEuHE<AEyfCtrtZNcch|?4FtJ|rHFM!^qkY`h6^yYNGW-_dp
zDJbM<O-g5z9ViTHc#cdoVXpOn6?j)MyT+3HL}f3TiGBofkw*H|L(fK_?5>>gbBiY;
zU71zd5p1W`^aDnNgcdJOqOo@srXP{hu9@6jxzCH5W~UN4eXrXGQyh*Uh(vi@1HaPZ
z3Xy&dyvm;+Gt(3T&_<~r0DBnwbZ1Hp@|ti>cK7eM02q8H!(Pe=YP#>r!Votinm)E~
z-iR6Ov%qB9?#3@%is%B6JJzVtM5cy&^UD1JS6Q}rL~yzpkwQcdFFh?sCa4}*lMb13
zc0-6J=)VdqJC$~oX11~6KPq|J+R55YpdeGpp6&?Y9mJ1OLx|V#BaQlcbpeJQNVoph
zUQU04J}2F<4H_aq9B#}nNH!(WFHcS6wA9lZ>>%<y_UrLC?E5RUv7L#u(Nd8l^1AIx
zq*K?6SztW3A%SFzcV3q?>r4dnh-<aY3QvTod=bGPCz4U7pPs}2Y`kETjB)LNT80R|
zO?>taZ+FP}1__(GS#;amAbXRH(w&uvEi7gYdH=W8Uu@HAqa%=bqJu7vysKxBd^+;H
zBb(sX?)?Hk{NdrO*@L51P6P2U2b8{x2i+&;B6Z=`dAozg>x*S8l6Cf^BB=q83h$A%
zn)?qLixUQt<ddwa8st~wUtQK2Jlo-a)k7{IUKwkDt#8wKxUF7xSq~>R)C88r-whS`
zCIz?bD|;idf;Dnb(V#dejg~`m#M2AC6I~L>GEN1{Ty>*Olo2Afpk~e_h@%FbFcWC#
zJ}Dn;3ow7YXOm~(fvOSJPj$})SncofGq*OjtOhhkR5uWIq_^o=xHODoTFfVNKVwAb
zEcxk8+i6SB*q0kMB7i}tkd9t2W_vd&hXPblvZT>76m0bDnl=Ui$mAg<N7P9c@Tx*N
zGl#NG;i6bJ0K=um_)e9W2Yk;1&D3MWhQ!fA<w_cwCW=<zjQkh*=i#k#v~~xr7(~6V
zaG<xRp_YB{{t^~~-)Og*KUBM_G1~2hO~sn*^-yYdWh~ye$v=1i;M4NdVE_gAOyHo`
zO2^&M4V?R5L9UAYrTo%P@82;gF=NIlq@cn1_N-6#_$H+aE@yRWz>xCgY}F*76Sykd
z+#lvrBX~{kn^WM7{uXjYKZaNbvK>d`*7kjomNV0gK^x+|1dXlOvo$T9i9*Xz$F=<U
z{(;-_pqK<Su0SEAjOZel2_|e>ja35e3H1PlIR5gUId4+MtTtXp_Hp>XL2>b<B42ak
zGM^sJsx_eCPN-ZY4XXDqeoEx&q`Eo9EIT0$<v&AGhhWjW8-mX^7_WgY+)9+pw~{GZ
z8sJ{i9BT>yR{q<$s@4SsHwoJJvTYR{R~|1=%>p1KF=UvYBS1Y<u)jpyQJnR1G7pqd
z>g_r3y@r)oD^x=bXrwHvml(R|R;nYbuJv!rp$`G|Pp19buZ7o40)L&3JR5uXzGg}}
z<$>u4M|Bv~^6@1+Ezjo%zE_d+ptyz23vp6Ae)mS+n^#t)Dahj9%zOXgZV|BU#{FTA
zQUc)_Lm*L8c90#{6#&aDk@If|<|OnnkT2D=w_#mN-_Xskb$;7QH7ih-_JBpI8io56
zUV*?L^Mh_=T}y=*!X-cfBsvU}z=440Zn3E6eiLLH1ZZ+@u6ln-ylcSZrFq*ziM5x;
zZH0E)tyuX-#P_`cjA2#!=*W{1)u-Gc?=+Cevm!qJO<~~LX138OwbvEY%)YWk>*;ZQ
z5UPEgz^3L#!;MBGY&!$gH{*^b+|^u92RtQEXIU}(ZmQ1}nLcGv!}nv935tLrm<X=d
zA&ZlJ1j|uao-RXlP|}6&q9v`m%%6Vc)Q=TRC{x@>lkz+Z+<bS<_GusXzje^{iA_HL
zOAE<kpy#{hzNDGB^{@6n)Nd0s039toEZq9ri^#wVaA?l2*u{*0iH=gh{7mXb&Cy@J
zClm-L4Tz4vJ%s<&O%}%a|90Z#W=IV=@MRnkR`^^Y`!ZW?aZy7t4;t4JTA0Ouw>3W_
z`O`jUS}vargMeS>Zgm*H?Q6W8UZ5}S-h`6sw8{<c>ZTfGvAsP9Ef)%e$A(zV1ilH`
zNP7TDfpVdK<BogR^V=6i14jc|EMHsyX_A|~9-458vol^^B&~9YJiPV0Np54szAEAJ
zDzG3d*!Ru7tjsha4Y&TzmqkqsY?QIe8kT>KM1y`!#a4dQS5$sGKSdOcCzhCq`kKq{
zBcxy*abTE#Vd!Y7uK-V0>=}#w`%0pvo`ZHp^p5Pm_%bQSRq!&tYU{@Sb3~q4&79Q`
z_D4$u?wrRwTI4FLgF3_SfHe8+yQ=4=#SqFqKr?7jmwv4Z@XQzx`@U(kzF$moFX(3L
z?B;)6r3Jj0;%(4%O6phhuYS=T2QaT^`#bW#{Wq9>d<ekCZ!_@xtL^kG%@?`(NnHHz
z*C&94oe4BIk;sStwHPVLFShK#AJ>VZ{x!2u)Lfxs#{V2?Z2{@S#ZU+H-{oOZV*(HV
zzjipSrg4$<5<gzt*ggSbR~kGM0Gputn`qMp4mbgkr09XAbGdOz3)vvpB|L8y_5g<O
zV4L^OZFB0iMnU&F@@YR#jiSeSN#^gjnN5!kBxV$cql2C{Ia4K`n@;n;jem{F5hz()
zzkkda1AWbrZegvA!F10565le!CIfw|Ne97o+v&0oD6b3vY(P4RPnZLZBMrYH+7f7%
z_MCj6bq+}1Ug;{(m>+E{LvwKf_=Wg}RtO${Z~e-P8-OQzvvj`$uM3U|!Z5O!ddbV`
z@@*7yLO@=>^LDdLDNa+zEP?psU|jXRHo(|L{X>Bo_6%UtQ-B&+>IeqHy<-4%X98?o
zRLJMdy{ZwY`93}SCE<r^`vZDkR1-J=Rbx;+cHomapMZ$e04U<6aH*dFD3Yn@F8Iz9
z`8??MxDLL@aMw(agXPGfn#^&uV)k7t?&Z#riX{F5c>3-FfJLLO(Km?{k}gIe^gt#1
z8KefQI{-5K0U&Gu61R*><2i`{9cw9Qo&<s^ogl3#(2MMOxZwskhZxK>&0xZ-{GQ<T
zI0Hsy1XR&KK|}z+j(GYU_~uWqJ6Kf|Q$WYY*ZDF_5bU<RmsBPGmv3wW?gzlaTGcGc
zmmb*6`AnqrHy*>SzXB=&cPS{6-7Lg*JfC2UNBNOAkRgm(BzK+!R09Oz8^DsUnriMz
z6W|Hv>wtZbdgUF}?=E3a#?~1F5F|`&z@j=&P%wvr#_$dxmZe`2fPfe}60)=dI13Bg
zn6{qP*V^*kfw5Nj*QtP0aEUp7d+DJ7w+;A$?wPxSQ=rG{Q@KHl9|jhR4Gbv-!~t7o
zhG{nV2m)XiPz#i)Of96hC*21PN{azZ-w^gErAy9B>M4mkP!ps2SoG%JYuC_~m&ny-
zF-mK$qCTCpJB6DScUKH2>O_vhcv%rebH~<AU+#ZD8g^gISAp>U9RP4&x8=et03SX2
z0@jaK)S2alCh9W~RzQCGE{L<b`x~$)<-4d)C~yW?g`Ug_T+9IOr6e>`83Rm+#<T6W
zs2;p-!@Obu^JhBubC&o|Rw#h)Kizlivj!rzoM2^A(^GCrGec{mj!ps-faJafy|JWz
zfwYv><SDwnukwM|`c6lDcWyxI>VyescF!v%V9KB^T<uD%i%n$n(`9PK+Xj?D-HdVg
zuKz$|uC8^e-D@;IX`dR<R^V4O)ynEFLP(9w1-rJbFC#P<^cVMU>|_>Hmf+s%=HE+Z
zPbqLFDI9cQ(|UhJQi}?PB7cE@=a4|BZ*mzD(t;<|E}nLFKroVPPso#JOovdd{l633
z-)9z@0rJ<n;fY-ubR!Y%g8PKQL^vc1e2J>l5!<wL^(Kqb1(e#NnEN~#VX|?gS-C{Y
z!41|&XIHl#WYpr&DL|euzYux#pjh*yEnoz+@L%AVNA<}^JIQB8B%cLOCS=wLs^0Vf
zZ3$xFy3*r{tvIPiq{)B{ivs#Uw)(~i*3!P`US3b@K4>a|?`0hfasB>wQ))2U;}VEP
z?{(Gl#v(vKUjqT_@cl@gI>b%Rck51f)HMT($hrm)Pd0!Rt536Pf;ho}`aF?)P$GyG
z<S5;YxQr4iBe(-*J>eEg5eps-vIVm@^OmUbl<L%e?5I<POadbQ!PRq99@Iy`Ksq6W
zn-mKEiUfa6z$xhhLYyH5z`Ca&G}S>_c7T*)>L$w>Sn3I4^rzL<g#vbA(4u7U;IJDb
zMZkg^KxbROY_~f|0b;?R6LPm|K?KA_DO9j)l2DZt7{prFiUK&{Qg)PaZeGf&W&_Tx
zoyw`Z10nVGoq{`nGKGNv9JBgzWfK+qP*3QT;<*cyEu?;$D*a~vgRu9Gr#gP$#}Q@k
zIA-=HJ1gVZBb&^!w~WY9Dtqtjl@&6|E|FyK6hhf6Br6rA{O(u1-{0@&^Z5Sp`?rpB
zUi109uKT{O>*hl4^<yq@Y*D>%25+?&TBrg3B@iyhLB~Ggs;)s8PxGhw(_dmxYJ(=5
z4s^0@zaxGMu#dW<yyBtl`#1K55mDVtYQb4n6u7@F26eoS2y{~$UB|hu_CmjMRmQOm
zJ{t#+fR=6rE)E~^yCpJYCb7M4Q%{&$oU&{qtKV0@Em6NzsY3-<0T!!31~?oE4Mlg;
z5}ETRA7HDO{*-sKEZd3x(haNkZOFq+?*$UI|Mw7x-V5}Sz_#(Tx=cU|-d=~2L2+nw
zL$5sJSqPk180gtNjLSv)$nApY(}%h#L6Ac5{Kk6@ifjZuBU;4Dr*Eq3aNUrtgyznq
zq~`yK-7{2Dr1If=@mdQG?-8drtl}BXib&y=V+<G3LST&u{1<O&*t~;V@W9D|@c$qK
z_{?B6m*Ri4A@CKHS>o^<aE<g+|NSd+{O2zl14936@IrVchyE*Eu9la6zl$HwWjx&b
zQQrHZ?<3AfX5;mIrEC-XOGzULTG59}Cn#kyh%8-hQrUO~uRlR=K7Av!xxF$ldTw_5
z(Z$OK#s3$Pt+J3yt`59<>6ciocJ)#SB0zY-Fjn||x~}uDT;_&%dm6=Hwc)!)0p!q|
zw1R6e$T8G3za0J81Pjh2Gxp9W@KWE@&b*Vv>uCCAPOceVp`*ACSi}zAs$cNmoxiM*
zh-4#xbB0O84mS+{*y#Q7jLpa({A@mZ<>dNQfka$EXJJIgGUyykcP&2x`%25~00&I~
zAOvD2nZ~xv$GPuT>>uB-@K8Q|80^K;GfTMk?S!w)Y5l)}h>AESpiMHFoO52|)NjCr
zY@kP@`LeITgjo~;!XDWu*e=|pkcu!^m?L1PPw)Cqf4U>xAASbxLxNEx3~H;;`}cvC
z)^CvNuWn5}z25J?mVOnnKEm-^Lmzz<q6=fFxFa!t+P{v<B7VqrdO!k;VA-eqYmBGl
ziG;@mbsIUfK~=1^@ZQ_7d3NG4V-~T+-xn%{buM*;oZ;5V>X5s+It4E8NX(0WZCwsZ
zMri7mngf_XQUo5C<+wrrhf8r7*`tvz0Z1hv0>K;-{Yasi+J5&}<h!F*$R(_Sv?D~^
z56Aop;&|G>e{kPV`@1$@;VL8wdhj;e0G2g&Y+`NbVU9x2_#PO^eX}7)h;;1`EaH*c
z(6dyaK(;l+yE1Id%KY$0vJ;3Ce!*V70C)$mRiY8*IeR6k%tj$%<>|Wi@-{`-ybPHA
zm3!sy{`Ai9P6EvZ1P%HlokcMFBF}M-4lD)^eog@H2eq+SBV64;X9X?xArR5udaFwN
zZF`1_oyI-_8uDBj+(Mwnn2OROKL)*p9DK1kEyL=>-clEmY-qLui4P#J-QD*FV<eGn
z+g-X#CjeWQc|jzD{1B|pTqR;9@CIDNsqPW>UDfE_EdXZ)a$npLz~o&F`$ahtK>&7$
zjG>j7Zx2sToIwd-KQjy^250!sQHyerHc%-5j_PZ$wGG8g!!<@p!osOhXbi9LBQ{rQ
z*ehZPhN0COKPz%CYS;oYILw#M{&<YUE>S2`n@uNqe6D%cs~4x#Wl*Aa7t%36K`>+y
zQ)`J|ELgxf3^1!6WsmSD#R1?8f6LgA7eHY%OP(EJPa_(KK8_n^<C1bppc6YgfH*rA
z-!X5S#S1#Uyk*uab9?N;dOv%T6(Gj?L}4ev*MA*<SAeUz;fNzE_j$J9yv|4Y(veC|
ztntZO&{LM4-=7GM3C9d7KvI>!>>hylmHE+b<)Dvs9zGdHQXZEfJOl|c00mBb5s}_S
z%2Uwy=xDdWV-zssm}x^<rElK`tGxbwxCY;*J~f-+OezO|qXl6{87#&PfW{_rA{dhO
zr=OT{#wn}319Tqj%SQ4mM7TM67!e8tOxvCty;1x6EZ_;+P=R3~Mq!%{NG_Uea=*$&
zSzmvKU#b>M1=_7;a9K<!v+^vsr#ARC*}ZSu$o8hvTBg1*kH&RN#R^@4q%eCI;wA$x
zS$ko>BM_Ml!~1AEy9e#F4k{w$tM3z?($B?Pa*U?4xC^m!>OaFh^4xcwU!9LCfE=Uj
z0y@}sjL8+P>&jfJ341G1H&9ss%Ca-2h3Eg0#x)k$lB6pv%Ki^O&C<zUyMjCd$UTGS
z7%BrMG}SZ+JOB4DY3B2PpTWOGuuuT5k>hK(qQA@d-#^P<yZrzE=ZYkhn4_gAbG-Jm
z3{DSVtFHtKT3NYMUk_Kr7T2QLL;BvyQRllHE#%2*f$e!v^}R2gj1!WOq;_=80*5jh
zewrczd021ZH@Z!_%|7>O8fUfyEtfGj;3og?xlOQ-98iwsq|+vG!{7c7IKc<t#6<JM
zHEkTqelQiM$Hni%@0>?y%U?uN7zsc552hgKrN;a;dG8hMb$RfcBbp2Duvt4Iq?yVr
z4%WT@_>z%rt1>J&EOPR#-g`rwCSCZA&zC)Zap-3PsQ3q=N)r{U4kldydR)A|l!3%{
z{STchpITwOs5@ZZ^X`$fsgQ)TSHrb()_Y9&Omq|+UIPgIYRzl!7{DpF!8fL7K79lS
z5&<abeD8oUe8L0)OeO*5<^&T+z|Y978j=T!myxq7xU<l#&2kwIe{++_9j;MYJd{&X
ziyB$r%kW`$W?RfQfm$|#R4wlG-qtK^hbypE^~$pbcMC$8s&r*OIBBu0!58rEvQ%-0
zP?8@3In>hEGC5!5WoqJ|nVzmVLg~W%*Y%YIwXP9;1$fbbo(YUdG~S@h;s}AB;UA!E
z)*vO4y|pbh&@GVxI48lsVkAW|oswteptL2r4Yn}IPk9Y2Gm~Fwe?Phu_(%n?=qo~Q
zuq9ny3a3JXbvm`PKy;}G-ceY~%55{?eL#rc3AD$ZAigfrIUz;Sl^~u``T&vKK-Coq
z*8zwQLb==U8uNbP95kROqECVl-JOLC%c&H{WSVr8;jh*z${i6AIZvLVSbsH0?V7;>
ziEM>SO&%VDuwm>%kPb@5&&6IW@(|G*IR(QviVL>3aEw{uGT%LX3*=26&Ks~`4>bL0
zszRoJg6%NOj90%*#nGtluwS5ZBx=8Xm7AUGl$Fjt6hd?PDTV*8x#sxB7Eb#g@4~jQ
z0DOAn-3OeDvE;L`XO`vp4&sdG>frFQdT$g(C7%BJ3grThs&HdiRhmL?#qe=_wlyqs
zxH1N%Uh)g4>lcMrJPHoph7QOu{KZo2xR@A$FWg7gj)GLO<2XbD<hsZ%il_{MJ<I)A
zxR9rTkrdvLgdMZlYoP!uN1!wajss*oKD~61pR@%x8;M0C+J`&34FbmA-Fu)pfsH|I
zekumJx&Tvk?~Q0`X0vj3MKgG{?L*>7BV@AWLr5DHmk!Gx_#q^(fX;9Rt}E0v+LKee
z=$x{aKsZPg1#ZGgB$2{DW)@u1aBs`W-z)&)@Pc^T^$8>q?wL6VNs%lYZ<?eP=rKjO
zkt2$pS=g>;`HATd8cR%(7{vRhzZSsvnDXq4Sq?B7<zR+wSVZqY5+PktuZ0{$8ScM~
zex)B25X(CY#Yn|D%0=hW8y~L)broIRoazIF;{+}l!dHJZvLPOsj9CtqSq3R3=Uz4e
zYB|W0f_sZ-fIzaj3S|?nwf|K~$wW2xt7`1`CN95TB1o!$Jsi;n!?if8NBa)K6hDA9
zjVFoD?>x30oCr}jNeEI^0fQINeX$l(o@^`dBnQGBPIm9!Yk|k@6NKp@`}ZDj|8_`u
zDT=C)T=p0%R5By_qbo53<)Bk!TZc-~Bdz{wkZ>2h>ExYQLVV%VB%JV~9Ak>Uz-zZe
zeXs*c)<@HjwBnKhhCA55(QWrR_4#nxUfKvcb?5!c%RJXjW6DON`l!<#wtZODt`uTk
zW*DG_+~0%jF8a$SK#+u+bm__p5yDQ)#0tpW5paWioEfO=NOxyvT7hrbRw`nFK(M=u
zDVGLYK$#UOSgDgOoeV9(&N&XgVYsVsDt~jNqNw`?INF|q1E{fR4|*Nv-`Gih1_&y}
z!TwXA-~+4xB9T7P2f0wdAd_BA#%uwYD;9m7;}7_V;vPH~o#w_<d{xzG;`i0UX<_0H
z%!<SAia0$~i)Vogwcj0hm;4QNJDW5$9uo9ml~cqyX{OG4=RiWB__P?v(rB_Q@gtw5
zJzMm3eBs_cfws~qY2_%Tw7ujGeTX?EV(po~_sdglzaNc(QdO5jgeS54dpPzuiqD{=
z8{RTM1!PiP+ewIe)9DO|>eE`mlJ+3>pidtYZPe9v5cR=(L+F3rErL|Jm*NkI<$Z+K
zzd|~iuLj2;@dHbZ-s{7@BJt$ivAyS&iBm}*n8|4e+jiSHX{*s^C`#k+Ni4l&oBguJ
z^UBT<T8NUHvDR;ikj@-WfugD-jj|U;fRjH#H#Q2DA?(6QW`6|hjCzxeVrM!bC-rh2
z0TM11Ev7;|=nAq_L@jQgfwE~oZz{fRd7>6YrgqVoeo&y4)m|r_wGur2J~MdLrYFzE
z4nlWn+wD-3=t2E+a)WkiZn`<%Qb2`QdD+Cyp7Z%0ONZoP{oU{Nt8n?a8_YV-(faEc
zC8);qc0MDhnuNHnuICKbzSQA1(w4CsPk@dzWa|S|jp|f;8d!s+R=NtA=lKT6<_%b}
z=U<Bu-6Vd1_C`Qt&lVuz?pxpBnOO-wZ+Mm|!^)hYoWGa9%e<HWL3#ABl+HDgYaalV
zs9-(#0@am4<L5}F0p4O}ob(6gJXxO2GhFoH2NS$2)L*gpMfxuCMdv=3`UMd1)ugFq
z3TiUcNG>W~ZkZxIg^7Iza@M<C*tfWYydN&WO{4In<AoRk>9gnRoFXs<Iy&KUi@|4J
z_-#^FGF?oFqUlPs63OT|6cTV)N*)rCp{sFJDW0^`Izw53!XADyJh?~`Zq9YS)df!|
zTo$_^W!Stoek4Hp*xFu;@Vje<{hD*gu@I_M`R>uJmAVcF+VO<r7Mt}e)!brdYrVzN
zQLl9FdD^x8g;_mR6=N^VHIO`kB3HMtheMDOfc?;i3yTu}0d97C)K?m#?^#&UILc>5
zl*yfMnwnt|wwvNZhy8^74W!2*+A3WemlEJCwm)Uov+TX=I%kM>7de!|S*{tmG?CMl
zb+5R*p|>b4|6`T78DiFqqUs*=2ij(ME`uF*o@Zv1IG1TfcYq<mBh06;KfR<8tWGa~
zZGea_1LEJkTMztgGWdNT60vGMR~@?DLr9ybuCOZ?#dHfb?vhno4+6aXp^lEr3;I_0
zM~tv5F6E9+*o0rrN*`8nI*}3)6m!ltAE-v_G##gkvS(dSKSL&$Ov^p|WmtAy+gc~C
z*7L#T(Vf+)qyR~_<2~*3S6BNUh75*dO1%$d3Y$;bDU#??ty7Ok2-25Ek~PO!FDpb!
ziG^eIkt{gYc1YuYyZgZW<8UeIzQ!BjJ2?W?NoSsrFF=u9SSbp_7Fb8TKiszdO*Bo9
zV@s@irMd2{8bbtztz`r)cbQV1FB(bVN6W=6?){>Bw*XA}5O2Sc=yXczhGKcGy^_W&
zP<ZwxhyzO|>=i=GrTCnL5UjLGhGTAjsVGS#ToZE$De;W{)R(QU9R1@dZce6Rgagg1
z>t?CNS1~0YhMz^7JPuW$Qef+6;viM1KEc#&bivL#^h*ZsKs5Hb?G~Huyw)+D9U-ei
z;R)eas*a8ec5#<%t~n3f0S&&>#1dq`e|nU0AqSP@vHUAuB5vyX?6l9DPwII;Y)Vm(
z((oKGl$g!{8IR*NzqhKT#DT{xeXj8iwiayuy<+h>-KiuRg*v$Mrzbbx`z-3NCnl6k
zTM<3#36|Ak*_i|}Yj{<I2BF^Jp4l>2l2A;1&B}GuvIuRDH2*M#)9)7YH>EZ=w)II@
zHjzlUXy?#9$or#Pj&Z2hi2n4O``+)EAPBWZ%SF1GDiCa%pG}f5Ldz|zk<aYyLKLI2
zR=BEn;OZvxJ(?r=gW$f?)Lp0ko*)N9%HFJmGTUDQqrUwgX+Z!CTCIMh_)xn_xPD7#
zPOgAqP@tlC3tcCWrJkd&ch3;h7=_75U+Rk$J>Fcj7Ls3o+sa*lVXKTI+ibDZNP5cG
z6N%v?cf=jq+#gbwTSLYKw5dfNwjC77p>KO&44hcY&Jm-?mqqr}vsGWo?R9|S_`)Ne
zhUZlJHmvfu*fJN(iMd$itwc^#>FbkiqxEFu_C^8S$ZsZSWuNFq9+}a_;>f>x*FFBk
zEO0sHM%!WJ;jGvGJL=At6w56N_Hu^A>5R({ZWip7%mETa2L)LsLLx<B3;u~?@KQig
zW9FwH`Ck6@)8J-ETe#S+eWjILb-y(!A=XQlQ+S{Jfg!&A>t)U)+#m&WFPZzt#6o+6
zC^alGLs61~h2`hBP34C4$kvQ7{XG|{ck|!gyzp2(TQ2J8MxTI4hdpsC&QH-^RI*|=
z{@#Y2(m7@Y6z2J7(I=f(wB-VLlZv7K%$A2__3NXS>cXWnj93f$6to|NW6H*T`MT{h
z9Sgjo`&1}sJG#VDjaZpKtD*+Ei7sAkOAg0e^Ou})c%V-zc57wjp}Ge~Vww6woTEAV
zXN4-7=8xfJaVi=YhjZ+~w!J=54BK?XvwhvD28x?YMRBEz<PGGOX>943p&VGL(OOp0
z2F7NN#ogV$$~k~l=#<E{**kX6ymDoYT(}@#RB$4ICE5LzzdP#4L9J^<1M7So#>Dsh
z4Bknzw>_=7#+*~swFn2(Ikx5{$_nBTmIm^3JSeW=gWDZd8YeyutP5^GWMV~$b~Ssy
ziB{Ns>BU5H)5I|>fZF-_HP%Xuh_Vk`gT^p9>raBB-e1fyDL&IrP+fN6A|*Ve1EJ<m
zKoc{miscx8$8Mr;(&YGX!Zh1N=ept>+NX9{IahHu*@9=%_jx^@94ST8aoa{GQ#|oA
zX^j40li8VlS=v2II5CSl+ucpLyna-xj7L|bIm_S!nC3Lv9=dS{GsP_>x_L!>dwL6~
zm)ow==ZhgaIBg0`6P=38%<nN@7rUr`KNNjnx+L;P(Ja?n^w(*27n95)Ue5-{*)}Vq
zFuL|(8GgE5&AjPk^E#eY`W+(`Q&v<}9?;r!%~H+2w><Nuvrd)Ue0v9g|M@hr-mrRD
z=;*m`Y#9%@rG2-+D3-8i-wl<s8xr6uSt{?*u*;*5cRs&AENeSTC~rbsAZnf@>=tjJ
zO1hs$){vSV8UKK8^xE_J70rR`kz6y>sR!DY&f4xYI6Ui8PaDkn7F9X1fF;kLg}qpU
z!p4_Qk~^Gw$4IK7K?jRQ7MJQnG{J41K6_L8C>=dD3lZfA=~vrQ-PI23v|?V7+toPa
z&YTTtSn@~DKZ!+=DhOH~WzSx{B312v0|&Fx6WG*Z6sU%?#tFr}z+;OSxO~c3DX(?9
zdT1<(BMi8cXiP8I^j^>7KuLYdrq6RW6}`7_JLv5wGgJjY#UCAfeVwo-*MwWq6RI5g
zA~@5JjvTBKeGJ!8z5T1ce3&hc%z#)|{Kfe5ndh{7|M4Xbi0q2{KSCCEY_$h7#4HIC
z4Z7bTCilI-oW|!(wfY(a1ie$mijt!_=tr6AUOiuPg^3(2oxU_KOtT7-B7(R<Rhrs*
zW=Rv|HrL4|w=}N!S8L=`0v*?top%E2PFdlJBhhk$<~va{Tg{ims=D{Q4Q|!Sp8uk&
zPgHH7D4g``s%aMfuQYobc^KJ3)FKp@l$`bL3s-j5H}&bw5Zf`kpwVfp0=zrifi;l?
z&&5BgNXF~+QqN~NJn8K6uqLe1lzf;Ti!ij#I6-%<|2Cq4af(B-0(wZ|S!4D2&c*Kb
zVn{Q70l8*c&p(n4CDwNDAeI-kgf9@<<4Xn1x4qgjP`ADVNXFZWlL51)#GX&nrL6bs
zeIH(im{x>)`Cfn2ageLzRQKTk$&|PGt;6*f&rnvd;cV+Cy1I1)V{Z$^etTprLH3cM
z8mO;XdXA$4kJAm%5|th2HUR3b6Y%zBO+VTEyyO8495_bOZ;?Tg03d|UIhx~745MBB
zQaSCPBB5*zTz7y4Kn$!1oSi>}SOGVOQl3IRml!gq28uC>il={m&&&mZhttY2-2|We
zGxS##B)OdZFAaNmgrxiH{Q$xjNszkjoIH$yAk}(nY!RSfw&S7RTkuV4KftXgkk)(S
zT*V9WH$uvC<|v63rE$nDb&$@=!QhrmV}9z>=}PFFKn@XNo<|K&S4KM^sZ%_*(8@>4
zQX*6qA-7QTjXcm)i(4N54g`pcDDHP^{JO%o2_tnzA6zoo@F^%%$oa1J>r>w!sDG!$
zU-t_~P4tPz6Qsyckbu1{y8RR!0XO$`5UBb%P)^kq96$<0qIzJJBe!usr~5j*cph?6
zGTgZo??w+#7MmHHpGnU!H#2Ja0(P#<`sT|1P9l*6$z#g4jcOQ;qZ1+x1fgXNJiyPv
zDJ9V&F7^*go8rj11a3H8Hfh(m#kKeX%y$x*HeCm>Y-TdecCDc_<qpS`XA%)V`n+p-
z{@zVD7oX?Ac4l^B8lZfjTc1iKP^*}1Uh&L3_VVR0@lEjmeI#UcnKmXgKtF-qjyj<U
z*vN_iQW@tST3WjgrE$kIV+oh$vlP3!w|@;p@I=`m4GhL2o~V5-!*)<4c=)OY)4YT~
zNb88ay6xQ)+TlU`6Eb~#VqelB!O%9cERk2x26FNK#mUISR2Z(&y8_$pcl*;ZcotDt
z#lqQJraME72FR43c-&F&UR3N}gV^#@=<OF(+2MH5rL{nVpp%LK1Ni`Q@TPfwY<^@{
z@eD!^qp<jd_2TRd9LZSjYW=cpHRa21((x0KkEtEH-5j1$TXw2tW@8^`w|56L3vO9)
zvRQZHGPAt5kXFO_08eOUYZQ9p%&ML|2PXb2=ojG|M=gR0VeCWfGiK*+u>3KL-ymU<
zN&9TK>-x4B&nZ{`9PVq-!!djglOX!mu>|b8@5c{BMG&Uzq`v%#d7R_SYqM{VpBAB!
z#(3spKkzfmo=v<1(ld!dWhbk<Ja_nxa=Pm8p$+=aeK0#}N+Z-<qvKD}1c_!J>zf<#
z4!F}n`5J=+u16!RM3pIB(=?r^Y|M0o_({)R<T`us`Pg%Q6>d@=8=UnBS3mJ{kz$AQ
z?#G{oE2o{I5w%kcNi9hRVV$5;kpU7kCZ!Tze1evOi!^_LSK;}n#ym^eplLP&76n2r
z#-2H-dmnP$sY4uvZ$q`N+WPe*klvSR=bKfWkT^Z}xE0<qo7K|m*D*^G4t77!wDNeJ
znN730JT0b~{23Cn2T5K6qhPHAT)SHvrv9XuC&4nr;3jcy^v0?z;cs>Aw{t~Wvzj~s
z?PnlR(~VlRkE0ZTH&UUdMi-jX_}M%!1C!}%X#(rcX;sEc9K(9Qp>p*V_&a#+2(NV5
z^Go7>3B4m5TGFwf0B%Fu!`RVv;wtFo<@ZBiLc?fmWL(52G7c)Q<{2l{s&}2;fQZ^G
z#H{fhI9cC|Pb)H$G;o|1{wzwRc^#?O&~#CnquwcH?+YY0ZB15PCFVX8EvIvXxF}L1
zP1$c=z3Vp^)8aLJp@P)jxs*?M0#D+=HmemTj(%NGQwP{RJ&pAMmY@lDd<uDj>bdKH
z<7Y4QzbjM??&%P|9nnfP&Q?;Xw6*TK2V^<PC-tped>-)+QB+?7<+zd5l&Wh!=y}p2
zeT$R6T5;y6wQy^^8Ht@=W<ZCS$FUVO+q5oUbC}LOj1N5b^1@cf40u;q!XIn;4yPTY
zSP@%CHT?M}Z%HMuI5{ZIdJ2V2gV{=Q?4_{MMH0pp7+IiW1^0^hktZ}cP_(Fbv2XB3
zMGK-yBG0L?p2?R}+&3J=czdJfa#5rFvsdKV_n~~4Sg+J!D`jJ9|D@ezq5bX?BN6%R
zNNy&zbGk|~uLhbVy)VX@ry@#8hvD-*u@qFo-ZAmH5SK&)5qT3hIjjYP_|((I@m7yy
z1XlR=NUAVERE3%*4Et9xgsr4sK6!(&TsfXyA7u~HXVx@-_0UJ>_^BU=#kfAd;=vQu
zO7fYL52+!sMW%RFo6goRZ37?aN*JAEd~wc}VSUCh0sREMH0_H3MCkE;;v=-A#Yt=p
z383z6YY}oXlaV(LzC%>Qv>fxg{`~TB9A`$I+q})e>;;j!f4dw{9@ftMK)LS{U2Y!b
z8@@&+$x5kfAy=v1NXYg1_!pZorXDTI_EZwa*B9AiWWt9;O#4B*6*hhwRXJpoaO%EW
zU9!&e?sD5}LF`avZ+uFdu(JUK<g;NHcXoVtmUl?wEb(U+y+cTF1FTt|w{nxSXzX;o
zpOe4q)DA}R&7IAHZ0A2&<JQ(D*54X`|Gc|Ux$YYM@JV-Yx7(p3Yv=<$>WeefZ-%L|
zkGKW!R8$NYn7L<FIFG5GESDCatwzb2D4XC~`<rpADVg<LQVqG`&8|lBc(5B!Zro?o
z&LFO~FHXEreSa?f;w5N*^$FkTFOul~sp{E@!sneo6TPCGW<#p_kgWxOCd}TMK5qHw
zf=I_%cJm5B`DUEWu;S(D1Jc)IBjKBrMn>s(l<uyK5IZY7o75RSA=G)DlF`|PBD2YZ
z#WsvYdP{&PT5{McTEnh?c~c01Ny&=7IBmw|LKJ+@2&E$~MxX==l$QFmr7U7m>}Wek
z(<aC1eH`-f4+!F-&P1KF;%MmawI{tv`+EDe$yM9y8!^z>>~*H&EW}H)J@`YZny4P#
zQC2t&^}R2IBNbj$?`aCY^*^tWl5BI4ZdVMzxT*Wdd&FuX$@nO(<S;Yw3X)S3{5MIo
zWJ4qy2FJQ9ya^A?hvnjgoK{tXe(YZ*X*@>8+@Wa7n0n)U<0-C9KccbnR_dkb)=n2V
zSLHO@sNulKHIqzY9YICjWp=16=rdipKtxhubzVs0-u~HSh54q`v(Qn_;ZM#I+cw(}
zv#?BkZSuF}<|2fH8%2J5$oggJ%yb1t!&XU=Vd>ElHnljmk(;d=U!eCQUe`d_rAU^a
zskgOE>CB=^B4gG?@F)%<)KfoMM4RvpnAgWn23K$~UuKfQb)~#P!zgvc>FSo%-!A5E
zq&Z^pM3fYZGk9ZlfVr1djn$D;f?Qo?aS9L(Y_}KftCUOaE)oZZ;*q-Jy)kMtA;$0c
z(7C;5D{m%Oz!+x}>Z+Kem1AB1K`Oha@dADvr6$G0GhrDb(c=B=j9q!SEDWL!__w3S
zsY9;6J9LqV-s#WKkl7{4V`Dw@F2Uut?#xlr;EcuEwaz7u`_3jl%!L#yBrG>kW<%(K
zq?@7rVxccx&NQ#}W0|Io{O5YG;jqf#yPIm}8tBTgOvm=JF=i9vGlixGo~m=&#PAO2
z)r#7-t1x7iW>y=Ljnfqx<NLDl#d-UuG0OL)daPBcL{b!oy^65OJ*Yt{Wp8x>u3(Y5
zw?z@>bx$iP-b}u@cKliUbyEcMqkG#X(b9|hWbD&gZY3nsR;Y+b?D%t+3m#UC9OBq^
zcw<wa7xCn;uEm)*JL@mHn4^`YO58x2gkiZ<rp_sN(fZ0=9`-V(?-WmIPQ`CV`%tRH
z2t44Xr_hp-R5TPOSZ?H6zdRTdWZR*EQm5zarN#fbqLXlTq<$0Rkj0h4MQ04p>IuK^
zslZ>?cWS7q&2*V|BOf(*JzB|j#`OKsAxR$1ws--h2|ACC2a^zOXob8cd3(AW$&4iu
zjg;6P9JqPZHR^Kl_`ZGeX{KEjgkh={O$KDQ2xqmjq|y0VXN%X2zlSn+Y=vY#_<%7z
zc=IwY_-?nO^EdW0JcjM*-Yf!v;!+-L4)PX>&Z95X_wS|)+;sl)^=fKPP?MFDEmKbn
z&F=kVa~BgQ!X;{3fwKl4#B&^EWmn^?hCjxnB#TkbXtO^qLU-vmp<YIkG7Y}&&<Zy$
zk7Q{Ob{ClrQIzhz?JdK$jj3C-F`m|T7OiTibTRW5w6Ap}#>*tXC7X6V+58ZnQ@4U{
z2E*F%D>2-8cmKN75})7L70L>oV0V>l6S@@3j|o^$o)PYT`l41M^;^}6qI$SLn1x`>
zyE|LtMzxfnP95icH-US+?@^U}ndae9VxQ1K>Pk)`%{Y3Dp7_PM->43fnMrVaf(N|Y
zFf#{^dG$J-A@_>;bfS60aIKQf9eRHa9m$8&DpW_S@S4sehlw`vu$xa@Y*$HV|0Qgp
zCK_id@|ZqQ*rwj<k8)DO<U`yot`5yd!<D%Oc$TSHPr`&AG6~M?F9#5MQR>}dl$Fcz
zDLPoFBvaZMzOS1)z{-1nk6ZKgR@h+7)n71MYe_6IG_N9Wb8(-9sy=Es^y0V^OVYAz
z&t8~gw3auqn2Kck;|52aMC1P2c3-bcs>1OU7*v|Ru`<)0&FV^hZY_!svmN`y?;D}G
z^(-CyxT0~c_gZdJ2xRpLRU6@qs0J%7<L2!xSdH9x@P1jS`%#<)OD%EW*pg8zp1!y9
zjj=X!+t3rz6zoenwb$y?he{mq&nn|-AB3B<dD&u1m>O~vT<~Igg&`bTyTdy$w&WIN
z5|J0(MjDs(PdcB#(UBYUBIIL&N6^qN(JmF~!;D=nCXeJ{Ej&k#a>K_XTK|Lt5d^|N
zt0V3>Pvt}?ZtA~_Y|1SP)EPC9cXjTfUrhY_nZFBv8Lnuf(^Bei<EH69&kaKCVs%)Q
zvsIt180@K**?dIC<OEf@erI?2WyGzRNAQ(TB*}40!m<drk~=wfy!Izoj?sS_7W~-q
zQByO02W~Z$`CrAlD3p&rW-E5K^EXi9G=e%aSqytU2ndzY)(zb25}O{>H|P8<YVmPV
z8ao<YtLH+qS+e!T12218hKW{rlD%X?><43m&=Y^mXzgf<j-Tmu@_#So#7}a)Y+FEi
zXA?-DlQY%rwRuT~f0_tu%EgB6wVRx`%m;}%aWT{fCV8nUSF5;_!z4aKvN0={$T;tH
z3M3>!{Tub*&4Vl7fRqRoUBZT$c%nCFGpD>_ap@I<b60@2yoB+lO-V-a7h`?*4@?N0
z_}0U&!T?g$?rc5ZFHpKi#3u_CW}ve^I&y8~wOG`q{0NK)I>xFSerX%%2b9R*2FVXD
zC0w3CJ~=fK{7ZHH&|U)p7&1~PyWL^$B}Y4I@Z8JKC0`-x1yKzJ4InDFnx|*AHCe9X
zKmm9Z1V_FQ@<L@yA%8F3>kfTGK-uz2$s<IUO~9Skk=gW|K;XqjF+yXn>nZz)av#A#
z+y$M_UQZ2pRNY1-rdH#=$;Fg3zjlMsw#dNJ>sI-0m5+-L5Ab{sS_1w&z7<Nq6)pb+
z&#nEM9-G3g4=@@|pQfp=yi21LAG;@roVOGaz9BPUE25HY4|nEdOJ6|@)J2FOy<d53
z58*1SAHTBZXWeMmquknr6evS&!SnBc5iNm*ie;h!*f!D(XZ;QI;w4lZlx05nK&689
zcZTJSrE#620$`>X5CM>EA_4LbgGe@mI5WuRqYm$%tXu%;nn8s#=+tIFl#Kp~kVw$i
zg`a>l|AL3I%<6HLjfTQ9Si$8E`jdC=i-C@nQ_d|R!yK0bsbO52^E2wa<!r&o+CP`k
z>RfiG&X=H6H~6zV^myec>gc{EGl)6qOsmdfhQ;~cZj(7ECWiNMTd@9ovH@*?Q|Rd$
zoKa{*A9CtnQ-J!=8A{&HfJcbxOJAXqG=^$RiGLUb5Ulx6n_oc+Gb>X~Z9iLF_#x;-
za{{vT$*wP3z=zR4L3W0yGm<Zugfj`1E&%0(P!>xKBsE9b?pR@={L`Jp2}YTEn74yS
z?Kgt$6Q!LX945LFbO?DdDZKr;%x>}g`87q%^kKCaL_W`tWc38DA^rHr3Z?AqV7!oG
zRtQEg71GX4PGl@XQy$wAA#ht+W@Rq9jl<9Zbfxo9-%yrGB{iqodi|Xy)10U1)L4wv
zt2AXeRDn=nqRV*^&3STj_Sk^zt;YVv4!dV7VLnBFKuEwksK-{xKcRf0mX@fo<{fo-
zbZHDpGG!+D#zIwcV(Hm*{+-||5&~4>_cM{2X!C7?IH~a9CVHR1d$#2;c3~9#EcxS&
z7q1{LIg&K<YFs`+$z6?4Ch@ZL7ZtO99Q)i^#-pf`e|Dz5eHiCbVlve!(>=9zxsj{M
z-uL&O(7kKn2CG1Q&_b?d*I5RSjw;qo=-;uDVcod2S~{9cb&2hV)x3U-t<Rga3@w>I
zX(m^#)BA>Drysn_z3PxYr9V$^J_kZU3<K%r$=Aw0_K;r~ph<z&r0L-`f5)QqiHBXZ
z{Yu!sclBMENIbWGKa#rhmC3h&&35uWL8(SCKj|;rQ<!67d;BF*WhY(9y6AlIZ#G&C
z<xNK)*ILF$4?)UkMPXGbcT{JlvZ=v1YP-#2g^y?~)9yi2egE$h*)sTCr`QkV_Letd
zf@VN`a^vN<_dNO9NpVEK`a&RbqK8vU-N`4=4qqCgr(0f>_z@37Q(OLkg33A#CAhap
zmhcHj<xpi)z@}2^YF#kNS!>FL?q{pVDQNAn`$yokppW|$bFuH~56p`p6G|+vbm{TI
z$VbBWB+E=ZTPMRVr=PQG{U%fM=g;Y!RLxfLZN7d1@iFM$F!&y1h=Eq|=7m+QqmK)J
zdc}en*cMV(zmJS$_C{Fpc}-Rb?QBD<lvTQA=<A1Y9AxNF;G31v(ea2pP*F1ER%Dms
z<it;zUn{^6aN!jxQ!r!65l_oLdj9G`qIE*~5I6-@%nNXfOkkMznO#p?X`u#aL3!kK
zGjP*^5|qV((UWv$PfMShjF}nTw&FYf>B$wM717Ti(pZv4R3Q;;f3#Vwaq{x4@Y{O#
z?seY5xp*dJAofb$Llvfdt(+ZQB16k$^QC4g+WX?WkXFZ~8;57iauLlWnKi^{Ds@on
z<wN9VqQ&(6WlBt5RKhINFZAoLY;%kG&GS3L{5G2|FYif|spoya|Hp8_+*y>!He8yp
zTzQ&Z)Ex$Mkqyb6!BnkF(n>^L)w`x>z7~;X2osv{bJ2@@%!D2m;>^3s8W$uzP=@i-
z_@+>Y^J~vQy*vJNHz#_^C7sYfE^8sheD>P@6><g!Pu1dlAY--?spd!e7(dqjVh?<-
z!LtdkU=Csttw>lzO&j^R@bUtCcEp8q=HSYUZDk4ZBk&$%ZP?@S`Nm2uoO6KF6%p29
zN9Dz|x${K72`@XMDdrC{v2nk7U#PoPir1?2IoC=qeg@$WHldr>p4A5J8AqDT-ll!C
z>x;&zy<(<Z)3cwk_ie*MJNs2QmQenNv}OIkq!5Akhf98?tj#>Dq|BFZG2V+OAS9|F
zUsi~gIo-{8wJ6ga)z3GtNKGt4qao6Xj=9H@;+S~(3Avn~O2cv~``&}au|545J_4fI
z%Q4a&WT<;OP8wrayT3gm-onn7@KIzHN!;?bWEs+OTjW3#U&z2y9;N}++`z09Ux=di
zgpuj9%Uz~2s_QNuv?4Ud8UpxF1}dHtQT8Wdm0!9@Rv{EQwB*v&{9(yI#rllz<LoKs
zYppBF@hu_Vo}!fxzaPR7tD#d6KoNMZob9}feTBxf&Y3D!g$JdS{hZ0brp2$QDd4BV
z=~o4k9hPS<ZG<HAuAje<R)yD@Ym&MvJHAJ9-EmloY5G<)o@?8U#Y)!E3sPMkyqt2$
zSD=HZC^_*$GL~WY3^G98cRBwXFuC|x6{7I#HmY6i5jzA5@z-V@N}it67aojvCwqgY
z-eY)#R3-)NJ>m?7drT}&nf{vvu;j>+|Fb3FW^STno#Mx0MPs@6Z3-%S<&l*@s|>~W
zxS0Ej%j_5Ta-Cm3Wt(^tEy?x*|Hlb<&9XHjusnsnLf(B^^L}cVGA(_$t1l-K?l#>}
zlHzBu60^QqM0vj`R<b2ISy4Qg#rE<f?Wbm|d(l-FZ;UDiPf6<ZysOAnTydZ|lZbE7
zCvM_y`PK|kieDvqniU_nE{~cF{@xiC9ZL`Na`5Z}%H|3ecQeJjeOV}}XD_h+S&?TD
zB|l|2Z@X#`Lr$EFPih~+Q^(_M#+kow&cm0NzJKLNT*M`UIw&GMnsp9kOZ{f09+zH?
zrG84%BdKLOTH^|uJ8-TuK?7f**9%S587(KIC}0VofmP{uR_{QU<%LVhskcPKBV2A5
ze+#_GH?!(CPO*3e^-OtOqSb{2h7a#Wi|aZS*+@cjqQAlPZYy1yiWT2(=Ij3M;Y&pV
z^C7}>L^3h`w@m{r9fxmej+piA(R+7sTJWFL-lyEVwnNL37K;+7Hs_S69XwK@zB_v%
zq(IriK&+glYpNwo{n4}vp{*q=9ucDu1CG1~8S@*Sa-zS5604!`7g4vL57fp=5HW~(
z$%e11e7nhD8%nU=)QLYr#L#8lSq35gpFsl_Q@<~xB7prYji(2XpivC8pPeNOEZGqc
z5)JM#(Z{~n#^Q_p$oUnE)jhhNP;AkMffMr?%KlDAm#}%dYpHM|7z!4N`BvwcUWXU)
zV^G<Xk+s4nk!B6g--s3wjpOaBS=PIUvjkjgw9u5P8om^ACX`2L*}8(xne(oa{r>l1
z)0}R`;Gi~|R(h{D+lhm87}V3K;2OxFp*_du8C~8Ax-9@hZzFv8{oC&K)(g*FmE&S<
zpIqC<7e1729juX-h4v}+^esBR>+X>UDUoK2+51mRnLBf9cT{3@-i0N`n8qwQK4CH?
zD2;adVjnV1#8$T<`HtkimG`H*!MO*Ux6nRp2B@D=zhle~(1fHoI_3^tA<@f99x0<5
z%7*B(;q)oC(Ji&~e@ap!s}q?o)5lqOw=Z(4h_Pm&B~RWH>y~0q^pfe}o@t+^P7o)a
zLA?(*k9+o|cIy?cAxB3HZGyN?okdluq6My$>Ud@4;BCwq=NC<UV!1V9I8qM6-hR!K
zhl99MUs@xK9-dO)&&o#&%eInDMv`fzF(g;c@_r*X4(TQ+jtz;bD6WmiBf5Z1H+Icn
zBG7q~l57Y!l1WA=B~pqed%Llro|Q>8!h-GKOoCa#12wjlY1LycBaUQV>|wEff!=Y7
zq|Q?F?46-OPW^M(x^FjP^wsxeV)xn;R9qhHU+~|1T#tWq)V6l<)!>O!9MxjuZif=h
zo0RvW8I>F+Z^t$om?xAcq+jW?%FD~$HZe_<+Y;b@QLCMxYA)cw8AiBrzV}n*zxxF(
zR}!v{@z(@QsbW*ES;@9xb`6xx@0R}x#>OUa=@q<a{*c&d=g?bF&_5m4!cf;YIoU>N
zP0PpqtMn0(yPUL{4<Cd*tHK*w`-t~xM5|vxIeXIPQV^P)CR5RfYA?O)G1<d}H=$Td
zZSlWGi`SbgjoeK;yK2UTSrz|%OE9hl4D;k)Dl&26#C7rcTS(i;3I6KjKT?q_4K{AF
zI8pxpepqn|m;M5d-Xh@x-dl5`H;obBb#(dXgu&`3blLxA0_7xPVaEksY1_=PZafQY
zk(mPMUN8;9|5!poDQ&g{KKW^2Jm8D5*I616X9=Q3v2AS-`UCHa+Cs(X?d$LV#>}n}
zh|jy?8#?n`xp$4Xfl_o>$tfT7rJ%D_>%4VUC{g1Dm{0gVJ#I_iWtIoDlfSm)t^gx=
zkD*r}GerJK0fZ`G?L{|kg(AQGon!1MuwgNNoIHK3pksWi>8leb$Baw1s?IV1%qED1
z;PwbuTSmfuVLQ;5){CgogCTf;Gm(@u|BQ1XO)w?#>)nk}QI$>5Z%rfJi<%ez*eVIT
zcQ=K)-_h@xNQpwcTcyAdl{^~%&aFWPE(6DcFE8ssTE8HBzU|{gI5YUy9bb3^oR}cV
zpycmfRR>Xh-qXxr_Ce-`BMF<rHpu8*T41Z7LtO`D<aIE39L3(;oqu_bgr54_7eH>;
zK_-4|b(m(P8$-AG`ROatTtr?Cqf>9Z)8s}7q9mj$0tb3{2-%kSuU0{JsGJX9=hPnv
zT<zOupvt^ug@2InaE^H(G>~of0n#|zKt($OVpP_YNd%rxf1G{``e0cw?$yR2f&zBB
zK(IIAd>HOTl;hwDi85RhMB-9iIv|yC7Ys8Edk~o!k;l}`;c;*ssZCc=de2vLU53!5
z@N-X!MBOJU3{FozDZoriS^D45>_zn8zkmHuZ%0O-5T8|2D6&B8%+U3OU)8CogQLpJ
zgTohNR#c&+`~q36EnsUx0Oz~?S%bW%rb`iWNHPfg^%cpP@WXV-90e!#LzoXfW$w&G
zqtODhc99fQ6I1huoVPllpT|~{cxPac_5rxnTT3JajrRocTZ8b|5d^!obakq_e9pAs
zRX!ugvE>fP6*|-=GcSPdMMaQWMw^vlZ*&6GOTa7TP?d2f$UQ-Hr!KZQE#Vyqi2&w#
zm&nLdaSGf3id@(D=8#}zSJS|J6<*dWXc@OiIf!v|H6x+;yir~oOA%cI_er4&j8dG2
z0;OM(2)L7PPwzV}T|fOv8(07>@><vFT3Zc~G{!2#!3ai6hIg*S%|b%WwS%Yd0wq%0
zFF~`{(e6BG0u}zCJbPv%NCc{j6G(yc*2X`j8k(tjHb2OxCUxs_(9vG);|7}gbgs9H
z$u($g^x0ER{Y%)o8rx;vM7}wb)-iaT$xN^YZu9L{uY(701^#r)qiS=Ke6a<{A@|h>
z<8o`TCX1c;i8p1Q2&<>#z7i|ExS9=0c=L`?YEu!Wx;-$_?<JuKW7^VKD#xx72eCN9
z?d)-gFe2j&-z8g{Sdyb4Ms>SMz#FUpL!ye92@S&oInD3}0c4KQs&$ottpR`E52_#G
zCbKzaAEvcr_brw;h&MS;^P6fpEF&@&M+648!Nm)&A_s>s*iU7>Owo}RP^t3B8}6Lz
zoj42LY1g)%jndj!lNdZk&;7~Tcnujqt6r^NB+*Z!3(<!*W_DCNXg{)0g3M*$_w7h>
zKn1S03D5c8QJYWS!+bm*PJtj$DA;2EruIbJq)_$QX|%w5O$O6C<1MpVHM^5Ewecz9
z4x%DPEzDBka;BMcR$D=L&>~(@NLLva=X*5BGUj3BLDTgk+At~H_}iCxFycd^i!Nj0
z^Y798NyVd&Z9#&}a%&5&ngb2L@f5fYLj)^J*Fk5QNFDk)<}miO)+-_}Tu)Wi#bI_n
z@~7>q3BBseA?6!`#@1=l78PTiJY+=H%^U?Yj>uYGqr#FI*nlM0!!Y+3>B5rj7e{l`
zlLNyX-e0(X65_~%`)X>!uJ^Y{*C&t=HbA1Wp7>jpv;;0^?mYhK-Z4#lDbqk7*6Q}s
zdpIrREByk+O`T+pkqNQgsd+BEC#flOL{HgLN%bx`Iim+rHX76KK@4)uuo6wA?Q6P_
zni~4#pHzYg7iJ$$fiJH;F_b0-Jj}8B6VJw2vQ-t&(?{<;wy=!3N^z0(?T*e^M_N&L
zvsiT!%XI<EZm+uGY2~0>)s#l3Pz-}fyP4MdxUa+W*xtt$#p#5qGRi1Z8|a=O<Ega;
zn~c4UdOwEo#$m@aigDNt(u1osl$eU%zz!pKJ#Z}@Mn7lO4DWU*2x*tO!i7qHP~m_c
zbg}&=NI<fOtWU5<B_rYYgeODxD^z7HVJ@a^l<2ZrRG(3&Mwg2*H5I>pICdl}S#hjM
zP(-yTrwODgo3Gi}H_^-Z@esz?1WH;%vo}QvW#R|^zNlp+kx*VqC!dGU5UHUpKMNJ{
z0Lf=>ypBG{Cad3NYNfMFV=5XElY2>xgfIBE@4_{DWNS<HK%J#F3llm?2y2dl6sJgf
zN<%|NxXkZN#W{dSOs-~^E)z)xW7V0HOvklq<aa>*5OKSdUGvN}?$1Ad({c0m(EV5i
zClkf`?y9>W=!BJ_9Ybw=rL{}s2lT;9Ttkh2f{7%$n{er?S?zRp`*du;Q*=}Kz<qeF
z`>RR6vK$FtQEhPU`AVMKBXAw{CiFv`S8N7<qwpY=pz#gOv=KZ4W4+)2G6&O#@^^)E
zOr;E}o{r^FYGh)SyPOmy-&o6^?5*J9TDCpt92L!)=L|8@m5wmyJKhy7ryE3wsY@wq
z-IcZ0r}B9I=SSY@@}jGY>yM6*9p0s>6Q{R!RGj9~SOk-sfTT!o^7V+E8RMdeH9Y=j
zrE;%$|K2G;XAU&>NswM&7#F)C9*;avh11nP>79B18Svm*VaWPc3<7+JkkFnzai}N$
zUxcG5^wmlFvg_2jvMNS^FZlj-*4dMIESLV<L~R+)?GgS8#ujd`Ala$KHrS-^{ZD`f
zsWSw^0e&^A!`IQ^%_2y)xZ312sWp7^_uyDUZb4wW7N#^EaE^u5-)HLu*WQm;r!PP2
ztn)A<{K39>SPOsbKX6YX#0tH5jBlCs>&55q*(Fqf)^$6KbE}6dGt!R#^#NM&_jpqP
zEETtWuxfnI_y0gE*jiW5sb!}p7yU25=IcYx*YHAK*m_Oh$zQc1?u+im*#ACHh!1hU
zB09h^z_2hXA>{?k2GHhzk4#@52k?M}lI453k#|7V%r?b!{`-F{AsLYGriF3&UuOx|
z%36%iffP=$>;E~$3xTUCwq23_b5b|`!1XsNom<(=(@@#z@HcOO(nSVGCkI$ffP{4e
z(WDLX6)EMzb|>g(r+=2eJ7k>Z&W;=gqe*==Fpn**a6C0S06kyu1Be+p$D;sK&BL40
zs4qyk8AFHoMRmqG=gdGYw_9M>+epT`I==Wi>=%?2s;)lWf%%D9eJTX28o>cQy>B+X
zVPj7~iG-dq%Fb`MGRp@YA-m80h5(Mt?>C)C$8*xPAI?3-%}G?8&6x2^c=nnSAD8}V
z-SjRfx@TV9zfYz_i396uIJ4t_E=4v~Kj$r}uh&}65IE@BKplhuklgoQu4|V!#C0%!
zU(?Lx<tUlu{)@pyk?R<JNe}#$B+{%G{{!%ua1ZZJVb|cRA`lT>Aj)q$E+`Zt$yzw~
z><X!$dw^Kx_z~2#UxG%=lELHsOCvzE)&Fi?e_rsdg<*=;FdEJd;VuA$?W(d<g*RXW
z3LQ0aa#hyF><Rqq5}n77hsXwGuv!H1jjdELFlwu$i>>lD_|~yr?Oihc`!e;1Dhgc+
zD(Aj(j|WbWZXLcya&DQ@g$n1x-H)q^&u(41xqtT!)Vt4|BlYmCHUBXh*g#2X`tOBv
z&}E?&Q(S-PdycZdpaTZY)qzaS`PD<y`LOrZ;p%YKYhCP1uN$DPgrI2_BY613F>r}m
zrjw}->i_-ogp%Sy0MnSSz`=jKy+&6gOrYwE<kq*R$v}biVqG_-wB%#Wfc)}dH$w+W
zS!<ji^9ILzo2fBw<2wvR-bH}~tG@IlWJ1vHSdYlCAG-5>`xvV7EX~NEdB~wa3hpOV
zpEK+q<oHdkNLaT9a$K^`_4Ibc0RZl^Mtr5tA4<TF*H;;3Z+X<rz1^|o7x}8qLJpwv
zB7%WoKA|RyKe-f`VSqx^LR>{uz1iQ*S7yugD_h{8fJgtg%L+(Nkfee~&BSk5UY_X$
zypR*KeB2;-b7EVAh{_us#c?hGLgA%13d4%&_ZYr^p1Nq^U!;W-C_{1+ZplE8$G@G5
zEyU`^k*;3OD!wj6=K*XcQ~OE4m1Wb%6rf{h`7vX}$f-%tPgW=-2OlMCMI|Ig`#|w-
zdtCV4Z186zrMmF+M!0Lxs{)!3fz{yb2_nER@x?pP<l9nnRw59ORupmyKKpHCPJ=}o
z2NrNnH0#jiM$8<4O(P|%xu5TomUTPIdYWBN$+;mhN&cFsJ^<))04u@um6t-%WGDky
zQ~?x%h&}Xu=(-mQ`<mK)==YEY^!?fg4eIn#87UP=sulr{J0?iH-|%)1T&^%Efuy8W
zct(;Ms)DNiuJ#tWSj=6}$@>l6wK-@XS+thVd=rA#C4=M`fWaL6K(dzZX3AVls{KfR
zwXjT5Q4ix9yc>7n#U}akSlMi9A_Z8?65y(Boz+R>^lZ+tb=)oHuM_LDzPs``arV!E
zFs;w0a(M>pc5z+y_MF%T;_&Lt{d>TOF3lCd=)6w8Q>rWC2Dey{<UweC=z_d|=1cKl
z&UpPV(-zqVO%@`Ymb~608C__M3cMJX_t%A{Roez~Hebfq{O;tGOYo-`Al6Q3ld+pH
z`A--?a)WkO%nqE|+Pm9eT0t>}v_@qsWDQDgCw9a8^c6;SEtlFhx@j^Xqr|p3iHFK7
zLEhRhn4PS}=hI~S*GdZl*N2aNP<r7p?yy)BR&D@LM<B_LP2^3ns7@pWJ|D~ABN$n}
zf{u=Uvl&FF>M?I|b~=HjA>av&oEB+WQ)r7@3vC~E?{Oy6V(Yx^W4uZH7AH`m<F^)z
zp1UdEN;&?ZCFEyFCqrbGw;2XnPzbXGnvwN*GAn%t5azOUta+#$(!$iU6KaDI8I%eB
zASvEUPSg!Wa(3D9t08g!7IfXD0d2<2#x}pu7soNn9n%aVwF6a}C0rXMGYhnBV;WBU
z@?e$4|HzRqwgi(k1s?u6grk>Pi5KI_^X>#QY-`uOB_S)IYshF2(q}o)R~N^_{xsqi
z+cohaVR$?zf<*e*{U<arB_Jk?H49T1IW53kmnAOjgY0*VZO};0b}4{of^n!D?sn2p
z%&=L@bmLZKmD<~}rJ4Ad6$lTr9}mVp6J4_|;Q})e64*j4ei^&Gn77T!gJJ~x1CIB+
z@n(n1Fi_(sDEFj7^H9~$9BDLb-eXc>R<mmxGlRLVA7D!p2}=)P#O}K!1A}3s4z84G
z6yC)bj(nAvL}R>YG~!IjNB#7U(Y0SSt*>cx^07W)Es$zK-<l83!sN=Yt^P1}*Dw-E
ze2|M(a7LD<_8-6ik(h66E&`?^Rz>eRg}m99xntFjSz9nvw*l}0my1)wqN5&8fCZAO
zOyC}?DbtspkX8Wrfcy7>H76!{BcmcxdfD(2dOor5;RjUINs)|gR9tG;zMd>pZudh_
z<GP0$6@{XGQ%BN|W15W!YYknNr`i|cS)jSw%hMAegw!N3Yt?oSZN?Gfs|0^L!Gp89
zYKHK@Q~g=NGq693N8fIX9>Qboz()GTNYlOANC(}&Pbf+hu!Qu;&QM$wzRD$o2!dms
z98l17q3RI62kx{B-NWYdwBqm4M1tZlaKr|S$PyXKGHj1Q4T)Xi;ymB#WuLQkjRT&a
zfxBF)pHsfL3gH9}Rtuah5WX&$WLbuZzHru*?)ZWvYv*z<dYFcH*sf23Z2PERlrZuT
zLy<{@4qhUKcnb-qa;S1pwN4;Gc94`3)l5+v@eXYQTjJ;N1!TfUJO+P($_jG$v(B!A
z9M$=G7h9ZwD3s4L{XRj}V*R-S$odUrx&)vH*v!`5seZ-x-;kl4P{jh>E`e*Jd#^^J
zY=Lk^mHWe4A{gRqY8H>qEQ$r{V_bp`Hsv;!y03EOyr{K+A}QQjVb-E7_p~##LHAJ=
z=&yUH*I=-d0DCosVUS?~In~*Rku}kRZ6PWLw)tp<^dI=nY?OjOm+m?Z^Fl^x`9E~b
z5;BN;0P#?kroZ3s@diV!;C6R3McQ&qblhGym7YVMLdh^nt{*Ag43`P-kvHHjy@LrC
z^AMzRf$o_%m*~u;5+Zm?ZWP}fGs8VBoHm6RnJr7Z(MSrGpM(vZ>WsX@N^S4~sg38y
zdpowz?T(>s-BH!gmvW#Y-#hnOoD#2jfGf8RC^F+ls-!)EeMZzq6qo&nte{t-DG_te
zN~iuq4QX%>{OHRjAMTys2ak7fMGk1qvVsHb^7LTdyc%^qQDm;j_Cs&RjF=v2AonTe
zX0o;3<n>)DSn<3>g0@zydJD>=(&6P2dWd$mU^wgfJGb(0(BxgIhr$dAy*Pf&urDA5
zI!WMWw4$YH(sAcV8KZN_p0V6fiC`}rs!9F*^IKPjCW+3?K_9(3&&OBOBjzrBQ?n@g
z3rNzaj*OJaxvbh*H3{DrLWqH%x#!)5Ym3=~heea(WEUyVCXs%6qH00mcRoQrZYpvu
ztTw<2yU;^!ZY>^vv4{WRROR5^B^ifR78%(il9)zi<A^mV3*}$Q3uoMb@YL>)(`(t9
z&aG`imD_hTC7%$j8>xzlR+d1N1E-~Rx*`*<)MHT@>(4InsvODTir2WsevOQ~W;OAL
zzjX$FM0$RuUuMG0qjRt)Oe-OMHT;H|IrVvU=?%VHvCoq-QcQ<Wbi?o1i<@@6*#4&$
zDvZGgc$b6!;7Yn;B5vL#e)XdrNC`<+qy*r_aVWqaqDJ2TVdrDC{Btq{{dsD#ydlS2
zVD=0g^dECA&XLiO(EP`i$5xSt%s=TN;a43=>A>8DP+Y(=dxal3k-sBAerC(Pnf?0K
ziqqqDPJpd}N%?|M3jUB6Txafi4`=!}7mNP0xn~4Fov4s<Xrp+DoMXmHxy8?k`kVhH
zQryYq!|%?&+i`yQ`)?KbKOZyI4r{tesgVFUcS7GH1u60Nf@DM@<Jp}DiZI0M*Za|7
zm}(V_OQ^(xAj}f-^56u7;{U~%46sxP^yxX?)Tse>@b{lX(B%C_PUmspQ;JIjE<XfM
zSnETO|B``-&#kPu$CNyz*k9Uvq-mN}kgIhCIrqeem7Dt6=KuYQb+&&8YxOC%=C3du
z@N6L$vMlA$yEzdw3c?q7-r4wq<lAjS<#*&BuIK&FRAW0IfA&o?L#ad%wp!$1RrUB<
z|Ah@h`q;yNzjVO(-z?O^2m-jY7N{!g2a2(9s%7}&mB@LCBk(<?aV`^g3n2;2ZW({0
zzp(zD?D<99T7rM)icR343P1FSpQEb8?d5;}jXGGdK(q=A?gym{4j}v7MV1*A<W4d{
zT8h6)k2nE!3|wwDCX$bA|6hAo8J5-7Z2=MK<`obSkPc}?MLHz}r3C3NNeKlh326|d
zkx;t3q)S9X1p!f7MM6*!1U=rlP|x@O-e325zVpxbyzF=FwbxpE&N0RuGw2uRSVzDw
zsBvO+A7oikKpk+!5#hU%QKIe(Hc}?A+Yo&-hz2hu8R=&;C4QJod`RphIWsE=$(m0C
z1l_YxZ?&Ct?mihr%z(d^0Y1b{|9crkdWlc{cHS+@?t~Z{LAlaUQnSs9Bm|()YM2iu
z>ZdZkZiHrdKgaU!4rkCicC7*X$~TRqc9;a2BluszN$S-CBFg5Dmm<FuwtMQ7?6LG>
z4&X8Y6$h_ysni9b7>5boisJ<kad~WlGRZFQ?x7Y5&YL)h)sF0Z`xNl&7Bp$+Ll!Ak
z|G+^*3Wcy>o$LsXb2By>&Ir2JI&v4>&4f4mAa*6YWUW(9)W;%d;+B5dUrmCjbadA9
zEbQqwp3a+PT@wt&4If?nvnDoQPZQj~xDWWu*%67QVR#c2zqDcBP(of8voP#YD0X6_
z_D2p^pCA|kOnw7c_gJlU_{XpPR~-<(0|K)@3nmcIMQj;fOf@hl6OgnWWAq#YmzR-`
z5WsqazZr^3LE19`{ffT8A3c*;iiWUR{9^1)JYrg8YIzch!q<;lL&a0+1-S7GH@~97
z0o0}p>Vka2=xmrh>XeU1%EQ9x+v9$4k};d`nh0aT8U00&^IWzLd0pP2YawLRf*XGY
z8l|3#D@}9&8JPci=#9a$5gxwcxrs@RKsGr=0J)4aRla&ce-32+0>%`SPx$2OVcwZ6
z%1JML)e=SpVu%=^&>bvOKI!iXa6G`zmMVo31!d3>-W71zLOz<);B1+j2vt`*32^IB
zlLn7L+}(m_DU>7w$#MEWf~lG&#6Q68#90R%Ws5I-p{1{aeF;B<El7{u6%rBDiHrU5
zEwK0!JTdRety*bQ-$Pi+ZI`XbDhMfM{w>xH<hs%d88&j1)IM1)ZN5lIzXsF{bJb6X
zM&)iLm_yAN@}zqb054FqU?2gTg$ZV)iN;KwLtfcn4>S7D`(1d~n*D5nG(!$=-2nIo
z@1*I_JPLH?#Ds1n$OsXzrJY#=flz;d_V8_*TwT<d9vtC9S_|<vs!5=#w}U0N8+EAf
zI3eXHVXLDmp!J2ew95S%q@Jj74tmNl3D1K^&8)|`rej$(-iR!*Wop^Qp_<5wo1i=t
zx~?dFN1r-d40!fFU1MLjE1v;&7)t55l>)@x$JzsT+c}uFCU47<W)QTuzSX$0;saCd
z%b_7Qp@R%rj<hxaJ!bdy{5TKl-Gq{{MwdB==*5W}=IrugfGaOU<$I|Xaz=8d<2lj&
zg?y!{gbiRt2l<l?<e4kxVUKtgwf#f2DUQ!XVx5*tqN<S5<ZK&N<EvI^J20pklu`IW
z1#UYK@JmAWnAu3YNZ<?j^`*AaAE?6_;;oR~FET6ZyNayUf#*6KCfXl>`mGHrN$9KI
z)+bVA9t9|niK%)1ltO~qQAa`28^mq6BFHSZx$aKZCWxe>F`KTRdRr*Orcn|~P_(@N
zUa_R7T$>g13EWpcEA^EJ<(^h(6#d+N-7lc}EfsNB56#G|=&C?tUxZTE@g@qYm|Gy2
z^n+D|LhNy-*}3!Q%?aP8jHC}V@r=A?PW35N=FEL$1CDl|mx5*4_@Rmq@)=TFzL2VU
zSCBrI)jhw+pl5|ybb^D8Ol--EMHufFQK?RFSD-TEsM<OScmoN^Kc>aeyAqluHuV*D
zkB~jcvwB>Rq1641nlarX-rfri1f<uJ_ll2@7l>OgGoO~;EXCD#GhLBfzDlL}4xhe|
zYf{(*YcEuxcN$2fSAbD)4=u@=qZdfJ&_9f&pMWl9I$8XBPd;iLU*LnJh}P}8WAdJM
zO0@S9uit|vSNhqyo@s#^*#^VpJ%PvZm>|;MTiq<GouQvI?LT1V3fccL_v_~z^eW#0
zQ-{dwu37aWyI)a;K}%tUea8R#iuNrDqlnIxscQ1uPWzu0AH3kf_??ZsdH3cWT8ppe
zvj{m^;);N;V{=+jfqsc{2@jHjjGY$E`z`O<r6R!`p*n{&Q;*q@MVgrvPMZ7{=9{z?
z!{!MqJq;Qqeyb6-q8dZjhFss=EbXNpC+=rH0kJJ(dq;nSf%OoZqzlwICO!^Nx%_t4
zl>z}hnK4|qap?r4bNXjHNf<0?Txw^X2Zx(ANM>E3K2o==p^fIphD$)t>8F{WXQO;b
zP{ro2?YIClEZ*sa(mpsvhIu#=hRV}H*(pe)GjeZd@`JHt8|AieZ!KGEmc;<ayFhbc
z02plrxSDw@nt~Sua|W*$Zl|A^orBjyB9^X&_V&4P;zgz^Ym2^RDOwlZQz@l&2aAsL
zNoLn^lH63k1ha5TN`1F=w0%lVmcg^UTIQs*%I<HTENg&Ett=-krA#5jJItd{rN-4a
zAse@a#hQEZ<9B#1;OCnw_^X%#cR3>c%g=-}B3G78kxEm~)V%FB)15J9_EQ|WGcVWT
zyo0Qh0kS46l_0G4`@&=*WltpF#Kp$pZ?+}otz9H+79Y>IIM-a)oFYsiUv<W#Q8vb{
zza}40g_~dxRcL5}bst#EYj?mFDoA6>O`v(1(q^G#jOh$bxD4+gU%`>ip_Xn!-g&MC
z;~*-ED?C!cGHLn-*YMPJyT4usqK5ij^U<gLLG-gF7z48)I9c#_$t%k<q5U{xC9b&1
zyU?YHpjI!>PNittF8N)sAAFcNB(_ButHw%9&_zG2!0jVc(=OP=jFh+__zdPGcTs7R
zge7DpnoYX66Y?AowgF=^V7K{qgBP4yAPHF@4`Ww0R<e}kw&SC8tJyMTx<J0-vMh)B
z*+&qnEMdpDkHbaF+D1QJndq<ky)M64{ZWVp(Ft2EGWAH>61fQ16oKLfRsFLptNOEN
zV|QXxd0Ijo@lIify#C&I>-bTk0V>%$LE_YGS@#}f9MQk9i%r@5vHGNIwm!_gyDe7S
z3%d#H_2TV{<kS>kU5w63C%kEU;PKI-m~cNMkmN14!8_F;n?>psrl&fj+M&35pQH4A
zQ#fW4MHu97cs{QYqj#ZQ@yxZu{Y)HmN%%I;NJ1B55tq|Jo9LEI_vDZhtDm>Xvbn{z
zx5~aL04DiYceyXR7u4MK#y_11$lcLn*uoL-xk%k{u1<Xg$-pTspT{yM+#36QLc9p-
z!R<XgB1RC?C`ZC2eN!3tbfeR-bX%_$Vb*b;Cm$Z1@nNEo*jYWr(%B@L+7;?QZc5FR
zAFFCRgh@A+x<vCvIa*U<;m!)xtgP5gxfgD7xIUS$gq~=9d^}Rrf$ypQRoij99$WCX
zX_PpP2ewAEE?&5BqYJ}ObZ%6cRG*V;dAZ!qb8gnj{NM-{6Mlo0nfKwA>`dIl!g@hm
z9(Clqw22Xn!OS-mXlAlH`MzSvy3={09lT_5R&agD&3XyxqsrUdm7iwXw9WO4>G^KQ
zcR!g^*v%WcIevZB*RVF8S+~8w+gHaYM48z98z(1W2!;GCyRQjr+?Fp(VKCj@G!Ne`
z@<xh0Y9aq>?o`{Yh?B0J*lm~N?-f#iqSK)b^A-7e>dtD?nfxC5B0{2RkU7Ry<>Rw7
z;!Ai@TZj$us`7%etny;QMob}*5+BdN+2HO|u^CaB6HxJud5W<t+iR1b5{t7E#V{&s
z>iV#W<Pudn&eBUbbd@nizTbJE9kApr)aEiJ*;Fr-$zOSWve9(-ex}b4iKe2BCfBmc
z?!~0V*`=dl<Du88a&_U+dPgWOEu*4RAvur4a1DUIxGON4#s!R#-q`$#T;TB)KS*+C
zihfwXmwqW{0hI!q)0ee(`>Y&R2u9#9+dqq>Pw>OOOD>AhbvxeA9p)XkYiu*ZekbA<
zS3lcrT6b3>i+#ddz2z8DOess--Aq2JtPpKZz?z<fGzw93fB#g4SH^dzd%obV@T)W5
z&2`#_j^=s0*(IhalLFM#H5d17G52Zftu3zYHkJzZ=LVj1F`>z%_l%ClDAq%JS79^X
zm2UB{u+kdcY3rvg9bMhZZPJl+MrK}&t2OV~yiDKbZs#sFd~g@gND5-M7@qKZgxe#M
z|DoFC#j~;7;tM%l_JiC*?4um&RkD^8-l27@{h`OnP9L){Z8|N-t$uY@$d&%HFV|aD
ztu0hn9T`)ut2d?a)d@5LV=6eXC@!ztN@|5TglrJnvbIEm0;y)NU2rsPLJ*6sogjpY
ziB*Aevdrfk4W%{BrD84lc;@i~(X;t74?fdhR?l5&TNL49Jc7qTXJrw8RRuTuWU44_
zf#xxD#G_Ft(0N^Oni9lwSm0gQ4k@mbyCv5|z>;QAJ65T#pu^&EMBIADbdkr$uqRR|
zd0T!u@OJViR>f8q-<4;n-83=D6iZa@geUk59Fy#PJUShsK9M>1o{ba0J0f1)pBp#z
z(AU}mpJgU$!L02@??LRfo=qBDx{{>h{H%n7S{k9LrS|e|;wdPX#zjdV6O%n7p2_7U
z-B8`PTH|Nb^Qhj~%4ZLe3S57NpOi>QU>yC*)Hm&CA~vtK6^?AL&gsOVN$99UeS}F<
z9;dKMje`Ek){d44wvptG_&C28B+5LaEOG}u5wvo?Rg=uzv@xLr53f<=jjCAPku8c0
zr`z%7I(KDNaVOd8r73-2c*?fa6!Umi^634KMvf!Y1muDP<1B%cPpPm&r!4NCCzf*i
z)-5cJgC*#usAu%NnXtQYxH&WJ=V|&+l=S_43elV^mmj&en^4AlFDmHP3i*W+)ggjq
zpdfh#qfFd(d%b9G#I4Rp6fT%N$x7N|TPD?hy)fWP#=4lFxk991DX2$r;!+(!VGo3;
zm~yY?K+c4VdE7UjnEJ*U>ltX^<weA6wH4Ry&L2e8G3N3_Tjf$xv&lsg+=(Y0uAnYa
zblTpG^$i@*{f?`+%+i#){bjpTc=*OjSf>Mhnnm{d{!<@yi&wNF<G=ul=|zU~^_2(;
z0bfIQp;z^3+LNv0Bj%Yi+s0;hqaDlk3II70yf2ka$#04GE;7Qg%<f7kVZP*PNUtfm
zSWr-5f3aKl<&h%<)hy=z3sJ{tZ)lx}rj^?kj3-|sstLIiOUxuGAw2BaplorK_S~%*
z`@&q#h&$G)m+!ij)&{d=tzvBrEx4I!G3}F?E4MkXus&&|v<{{qCFADcHB|dS$70dj
z{G}ysA<~*YB!#~>OE9O#3UiB^wpIcsJ~l-(%+;(&I4mIogSp<et%Ny8q{qvh@y5aH
zr*&k&;;7)fP&>>1gw=@p<5%1>{#RU#PSXhErpoY+n2*T+IKVX&GJ7;jo}u=7wiqEX
z5<!w#VcTA8*KC$4667Oz@)D|yg`Sg7C}tHMdq>W?c;YE;sXE!Sx_id48GVOhiBq+L
zIOd}=B0@J4SkxxYBwj7KB(Z>Nq3k2M6fNUU@ojv%^~TPG-r(KzWk<3QJQ6wT8e;M=
z65)>HLy0QXN_xWW!hQuLQWDoLN^-o6_QlF)6ceP!x<1}+C+)Gepg_cN;ac8T_7?=&
z`NsMNgB2Z9-{W5~wC}yG{&qurMl_p0e}#IZovyyBYr?|70$0d9qV1?q2&1iK__sp5
z#-TTdZi1X!?<M=(0<?T%&s|QHB(L>LnV_sU9QYFQ-Vwhlg1-NiNU9wc5hjcw_#VE#
z!0t{JfxM<D12>P6FSdQCcbT5=opO9{yj}lcH3^3AoRi^C7-@H@W1LE=O#Lt`Ukr5z
zPtM)pB?+Or$hAj8;Tus>H9RZlw3suNf381EJARRRd9Z)iz&tHCie0&+XSi?OC0wIC
z(?BC&)hXqb;>u&shU7IIHU8j`FQ(7N>DXU5vUaK_kFOeD5gFzS0;KL-+O!M6(Jzfz
zrp#znbt<YQ<eW-$Ewv**R2YisNu1~wWZ%@X<Lm!vu4_SA=^(1teUCs<(N%R!S0|J$
z^1il;Lg9izvAm@=&3nTAvYRo<Ri|_@qZJ*x;+?-KNiYsZU=L0D#LJ8^U%y+ag2)Ut
z`zjfL$BMWZ*;b7=+$SEK3S#?Wvohu6&3|YHP!0-{hhh>>32*RK8&f>#Web<!C3tC#
zcZby3Db%4hhMNx<9iH{vYIC&vd8i;?p4bp&>wM&1IcrFcv*Gn{ro7|m*=J5+XyJ8J
z4vOHmS808Arq$N^F3#NdSf5Jz_4S|SM+72T*i@P1=hw^}69{oj&TjKid*8zaG^6rf
z+Ru0P&v-_#xpdT8{BkfQ^-oT`fIkT_c;hv9?wd8mA#A0R5XiE{-Cf5+U?rCm>w8Jq
zK0NuiwkhT}JXf-9mS+4H2(<YF1ghIcI{ym<Dj}GPKEuI_ppF{>fj8$jpyva$jsqYt
zfOtc;6aj%Bm8brJ)yA*(8wj+%#C!gaH&A@1=|72u!|p$lY;%6YnD8}q_}WCb!A3gB
zsB_+&QB2iCuwhODTr=k3Mmw7p8T`_YRxQ|cGY8=yg!Am}5-iR@jYG^6(e1qGMPbZx
zl~H%?-;rXWTRQroT_awGiW~-od+Z4Zy4eq?N75;t0@WN9fE_Wbc2gKMIb!I#-?xeu
zz{mOD88`)Jn)?r(+ARJC{l6d5BS_f6uHY*BSU+5#VY{nw4WLvHD%#5C43{<bF+B8X
zm+Wba<E=m6>-XND<{)^f66n#~Pg!g|dc{WQ_RxD}2j%CH2<?1$aUOC}m3k@Odot%K
z2VdA1@jhf6l{i2irbvW|Fy69;ASY$^_y`a*vv=E?qyO<8z|56_w-?lM&H=_*x@2`4
zeHw~}`*}gT=|vYF+U#h0;+YT*x&3LCQQ7ft52zeh+|La51}CE#!mTG?nL%}@rrG*b
z_4b6r{rUqv`6rU-F9}t0zFT3)kwHFAUY+U=#fLYoyFiLxGo6-8o&n)7Q2wh<jLwsr
zM#n^>&|tZ6(1{uQfTx?z?8G5N4Id6h03NHRucCBp!r@RY{O60-dKCLEC)l+7VEj=5
z(N2h`2qFd3_;bA$dANiMGOaE_q+uF|*e_9#r2KsCEguFDpdo+{&@3Y&!Y`PlPNrMy
z+rXXQn3AyQLFUDzXKtjT);N_lLp*>UDTNz7ML<k1V$nge$v<Zm`FqvnsG$`zaOsAx
z&oCCYm0l|D0>GED=kjm8gQ7&40x*K6W7!9-GGG9k5PrsmMthmpAI|95XSA^R$zG8b
z$e|udWIKKUVDb%vx);o(R3E(?;W-PunlzaFgUwxYEisAa=*Z^l`PioO=pk(=<tSQB
zBK8?nbnSq|0txyZ{AsA9BauZCcrXir`T*$X#DOo!hYNZMh7p7Ka}}7zL5_6{>P-^K
zTjBFkTNx#bPK+Wp#E;;MQjr2+k79+sxj4~s4HpO4+z)^-{eowP)G?_|2o!bQ1Lbfz
zfJ2KCOyE?US`vBW0=Oh6i|#=IC#s``iq=iI?G7s{+iplrH5Z}$NV|qRMR%jU+045T
z%|Id*oKI5a+gA`rKC0jC4b`qHqL?@(rpMSMBL&Fu7wTC7c4ZpnVd@9M`3)F;k~g#i
zZwpG5A283&P%jKHHXc+y4}hYPR(u0$i&2md{96;ucsmRvX&->!#HEHXY63~%**`o4
zI5I20cC%#-wSWMPeD3YomjE@30LFQ*9h}TyafGkDc!1o72R|S+A)u+JU7iEx9*c=v
z_VLBL@@Gv>kPiZ_&dZkk>MPvzNoC594p4KPGl~k7sh6U4Zo-3^ssopzDtnOfO~ZVF
z?H2Vg%|iYKNjroSi0AeQ-j;;tz?c7=>jw0F{Q7YsY63iGV|;P|+;)b_=CXZWd~Eep
zsJwBsYU@20N)gqt@TmvytXXFuQd<8>te86Qa-?(BWzq_WVvc!(WK7XFT;bQBZBk4H
z!%Wt*g=Sr<PL#5i+V7!9MsS9JPVAMtoNi+V51Gs3F9Lxe9=9BiO+bU$8+fz_X>;`!
z7#)GD6$9U|18M1DmOIB5WTM?$v~Y2NoH!zX(f$b(1K#0|GNp|lq!`fmfdX*W^s^CQ
zNj;hy&O~p|cX`YpUpemGYl&xh)z51VFbhS~cxd4|1KR=$ikxk73cXM<UXv4Hkw9UD
zQxl&+jDz1519pD^S~>lPxC^6QNKVa+=|VkX=N$E%0{JDsiIrIb*AJ+K9ArooYcFB5
zgp7~^fzDFxDM4Nd-b$ogB2s0Le(Yh(hd{Fuaj-w5DB6XxNsEWWkB?7?GV`~(h#d&W
z9q!+UD)7ok9@;C0;((<0Yalp387>TdYE?sl0Z#hc8i4Tfm$HB4$qrfpMaNB`bj|~!
zytKL${etj*hfiF(|96n}BVQ&rdgw})IrRt>821rQ=NTEupM=>zfITp9)DK{BA#ql}
z2H&8g7&F>l_z+2d1ZoCZ8P;sxn;jfJs+=k~5J-!0Z5EDY0&*;<tq?)Swg=**P~A*_
z*kt{%>KUBb|H<mIW9O)_bJU6CIvLGMe2J(78Quw2COTETQwdxxi`N<!8^Y?DKj@L6
zndu39LGBWctE{pCRF29r<jiNB6rEyVf28sdJOwkoHTU)4R%G`M3PwaPUL@KC(Z*N6
zvjHBpSGXqhVEH!@4rfa(Xax~7j=M>o1gy#{!H@Ajw3+q8ou}Y**LHzhI+?A^^cMP~
zd<T7@J~WXb70E~?!p^Idh}iQIy6V<+H`8*S#cdO}!?MXD%`M-D<|_N;f8fo<MKQ@$
zh3XW99%i0xuInhsDzMlsw~w>=Ju)5qy5M2f?l50EEjb{Au7N?H@4j%G^w@&^O>pox
z+c3NSko~?vaC3c?Gx@=_G{#4%&Y42LUMlVVFa{Y7IvxxvhK|0O5ybgmCMx=oPS<~s
zC48RVW{UYDy@ooz<Pk<eBPIYlxYps|yKDY%oS{&g=wN3poSk?&Mk|UksyROLO=GHY
ztu8vb3^QcTv>ab-zq0-w6k6aVIBs1Z{h(y&d)F5lX|wQhu*kba&=T37)b~Y@Hm0a7
zy*)tLjj1zLzUClLtu55KHTrm^pGhf?{rouvLk&=@b-AdhGgZ8;8vc>zw8*xc)Tw4*
z9jaQx0x-HrzQbWmo77xzw1ViZC@uffJu<!~v1GB@!zFAb8+hI$d$J5dS*XmA52q*c
zlJs|+Fi7eyd(=O)IILExNljx(98!^|+3Q~yUB_Axi@)1%S5|k^>ca2$&_(nfD(@X{
zS7APnl$(r3j%e1Q3d~xsz^D8g4mY{Vo6QWlj54{ySfGS?Ea{6)C_#M5hpemVAu=f;
zl<D~=0=`Gt+SD>|zvf0Umk_$<joywU?D_bM{qtS|hd%{@N*x(?R@jC|*@k^B<(U0c
znevDi$c&w#huFlo!ayv-OF2@lE(@*JN6?n2llsGf|N2n_y(|tFsW=BLQsBV*IkS!b
za^UD|dTTH0!zoolfRq*GG7LxSnE{$nxCd^YX4j&Ez)x&J7|mGF;_m>BYsA+~?Qe9U
zk_FMl)9ow8CpCHhC6*=7J@n<mx3sKKbm8aelz}cl2=OG%e3L3pC^Hbt<l$XPC~7JC
zbBX$s0&3{rXtZOZ=n^e*>F-3V|7~{iQWMS`-CK_QEJ0onrZzIhwfMt)8$uV*Qg+h(
zEIWJnF{AyhnX@pD$%va+BqMwKs`cmY@c{2L7c%|b?L0`!{iteO&~F%~z)}<k0@?Gt
zgu}}sNUR^4IJd;qi4%aAkI7{`GNxH*-V3JQU`zKnxy8m5-Lfb*_U15PKj}&7&ki@(
zVU#!loy2tiL~Vx?lVbxSi1dW3f9?kUP)8o3S`zjze{YaEC>=sA2PK=|VJ5m~EfQEZ
zVoAf$VEWIu96&QrLYH>*?_`77E6m`8)}J%@aPrdN=54GnhmZLG$6%HwuH}8ifxbnh
z=PIFL&Y!!LDWw?HvWa0l-!Ne}eUgAWInYH+@q%yzw2ZNH#5qUudVlM+@85~3IR%ts
zY`NH>&K@n7NthRB|Gf?90QdWX(-5Cq4@{>2y*^w6!g#44UM7FDh;>-wHI|Og{@q_<
zF#S+*eEv^|!gv`kxaf3sz0KcWok@qeE+^~A`m=k`t{*B}pECS?p1|X9hAv?fli_nC
ze+LGYPu6-M^Zi%b(EyLsVXepI{ojEN#zNub`G(0q`9{2q5tWur{mw$;m%jtEg3h>q
zj}~-uIpwf&X8t=J=urD#PY10)YYB9ky>Gno@61ESrOC{jw(~LIHZrS$61#y|Mk7~3
z@k=-w8iD%ha6;@^ym)zRydRz}zbZ3Tniz;JP-G7ojvHt)j1tqc5@p8mUCZ72+}^nD
z(3Cr)R*CU4uPFx=Y*6GdyA5h_f=TXzR5?ez*V!gsZ9=$JmZ7|{0Tu22>n|U3pRZQJ
z!ou-TRgl#=JOgA>aQ`8GaM2hK)CVqzA>Ql;L3r(~^{HXFcPLZ?88}j*Hj!!@@^O|u
zTRdDe@tIhkI@1REJYPcs5!<<#(h{cj*4Z&$W{qOrP|0|^C!MBZZE`LD0?J3BkxotG
z)n|%blZQYAn$@6}1kAP3_cgLgX#Vq=G^{#Fvi}@K9I!jm%<3=y-5pTI`<dZC$50xa
ziPpgr7yqvF_(0*O9{)KHtxz=$fgbN)us{Z9h;Q`IQ2;}TCxCs!VNkB~XSZUbA3yfr
zACH&$KTf-Rj#1<FX&t9SGwt8|$De??`X91loYsXm*!MTF;6GJGO@#_Mv*7;$v|D6v

literal 57495
zcmeFZbyQW|*ES3XRJt2!5P?Itq?B}bcOzYb2+}285&}v{hje#?BGQr~-7SdkIvBs_
zeaG{B<NN0u<NM>i2RG_DYp=cbTyxF1<~4V?vf>kT6cQ9TI5>0}X$e(0I0R}qILHbT
z1bk9daf%H7fOl1WA_iABdVdrA1KCMh#}y6^6&LnDJlyLHA~4{Kjk>m*wu1a4Ge-v&
z6LUvX3l=X2ComcgPSEQS_^X43n+c_tgZ)$2M_xkIw<8{bzr#LerKY?c;$|mAt*xL;
zDemZELCMX+#ll7{j6z9CDd=Ku`AAhl>d)oiUqaN@Zf;JGSXn(iJy|?CSsY!gSlRjc
z`B~XGSUEVD!3bto@274iUd&HjX?{=gcOD4~S2GtICpR0%r<AaHO-vo#-Gr#AVK@5s
z-|uz0*;xMPPETF`JQjFBR@gVJ>?~}o|K<jl3c@~pB<|?o<YM9K3g#E)7Q7w#zkT+f
z`}`iSZ0+dg2&CX*V<z*|&B6s->}CR6oiN9rtN-T_|L0svE;bh6sc)}l|8w<!{p`<p
zK~~s<{})00mh$bVK+M7@f~^0xOc<qNmE$WMoG6@(#A9_Y_?=AeCH2q0e%ZvSCx5^S
zSDq1%tm%)(3r2~Pu1e;fk$adI9E&x(ql1ehW=>xftSmcjsQTe2x%bbEv7ZZR4f`|e
zqr<K@UTtGS=Or;dR(r>-quUwP1NrGs8^eXEDIx!UZ0P*EXA3KN?5ATVO*K-H!u<dI
z;0eI1YrZ$N%D##G`%##vC6ctIMM$wMK0G$o?a#w!ka!)*7pO@8pN<KOY4?Xm(AKrQ
zJX-4*V69(z*@Y=0$QQkDzrWDJa5>0Se$un8j3^<BSS2OO!DLsc5r-5edWKZ7b(G3(
zwl(qoc;jtMnL!he4}S3j*vb#EVuz2~mBA(1%1F&udrcz0u8%i9z5R8u8WWC7Pas=2
zfr*V3AV@=*mSjaJKY}07`QYH^`C^^TXUnGD+G34j&AwM;*fI+@!sqj@_sG4LH1!<5
zypu^l#(m(WblLe<D%v!*(s{Kv^u7y{$)XNf1dxxY2TXdQ)wbC1*bmd%qlh_4c<e8R
zSldo^=gzl^%l)o)>##NCQdnv-7X7XqrpgSI_}3DB_8JOQ^JGi)>ijnIQgR6zZNC&1
zyz%=np{BHEe|5HN<a^jdCzmXt@Ln4BWc?4}2TGz=G?u{*BV%oRDJJ}}*>b+HJ>PKL
zLufSIzStRzdJmJ_UJ|PNVdJomqHV8nyTT=!LWDeU5}kJ*tHv$LkV&h|ey*0;>axjl
z<TZVk7qzXd<n04UA%h3ND@P%O$CmbQ2ao&6eM>z@?9Nd2XGMV;+b<E)0`nFr2CsyF
zSfi#eYHT$hk6x`rviiP_@JLqa1d{=|MR$>K+urw>HX-5V;`qS?p^mkHSXAl1DZ&K`
z6;Oz%S)yI(b+|HHW9KvG5~eU@dN6sEHS4{e$)CY(mps7WX}dSyP~LX)C^X%nHchfj
zukPv3AE%4Y&kb6ho$7Pk0ZJf2iHb;DT?MD30kru#Ml^-RK=@)Qu)OuhU@n#@L7{f#
z%J+1~(TB^w;-6L_hk`}b5J{w-CpPG5l<Ciq%6RQeS6U1uw``X+X>$!JQdKv4?vIv;
z>$m!byFY{-9b!757b?w+`iK(kO8(w2p*8Kgnihlh*FP`8#LALrc)1Z<Q{^#QK4V!z
z%wvl$evO1J$2}pp34O?EuQi%~3U*r+4<WJ@C031DuT~H!U(2CaM|IR>Y2Cu0^;=#;
zlRta;bH)6(ybo~O-G73|Zv1&RSE(3H%8OAGw6DH7o+lG3Cm<eFGAHb~Aoy-N%_{41
z&autFb%eVNDWBNy=5pQqKEKPigXOMD6{=73I*>QMN63x4QJkX(QaHjtPd={?ryci_
zI$o^&eYA>Cg`+M5#HHfFw+BC^eJ#aE*lk0Zlx5!P&2w$voTpzKll>V2`k@o}p(V>V
z?Fg7$4zr1D>AUi8gfrL3L(oZkhaZ<15gK}#wE6uUru~kxKUqquS$uiCp2DOxDv4vv
z`DW`&@!N4C)stQ(C`_PfH2!gh989iW8dcz&$U{m-*<8QNzr%d%HyS>ecLPq|Cz(0>
z>gY)40B}-q@q0?d(SPwjKmq$3l;Cf!$yXk!OZV5P#hxR^>Ku&>UKJN&{4;0&?J(Nv
zqZ^zegn!0xnU*pQe{?%nGyJbP`z8dAQ*&F;*541pMESrW2tIBs)cALqw-PWq`>Z*S
z{@GP5MPL?}n^|m={u)%#1>D%-!7I7{yQKdsrT;(aQUC|>*5mi8d9VN=r&FT@=xa<n
zEKKnG`ON4Yh9t12qKGbXlr$Wu0TyrO&lGARMGvt=ElUks8s*bC^5jw;=nIl*DZvI(
z4kL1KSl0vlz;>?VfE)0Vw!->TE-<f~b6T;4TA4V&J*h#6<8<t%%0|@g+twm|V+}BR
z@zWi+^&e+z`}pcZU>cMdQ7QA}g#y%lDV3N`CF8@Do+``WUmWj)!GV*M1S^p%tCw`g
zd|ypP<=?AYZ5iGAoT}lzZ3yhBiNd}%2eVWGyG%b*uFef}T>eOiz~Q6#zg7EpW~pNZ
z!O8wM!%*<|g#I0*c2VGWu^gIW_>3XkPcefpt|yHEsvRp~XaW2-Eb0lMrlL&Kpe20t
zjJGd_;>BLWx|B$P_U%`&Zzq74&+Aoc_$|6ycXSuripUs+=|>;M3&F!~++y|js=zer
zJ#Fa8f0yYp$l$r)O&`h3Z9nbQA4@g4icKTM^>n`8v?s!SYl4{1DLcKn)@^fqG*e*z
z!)HZ3hng=sR+*Azoz(9Z{unsXAHXW*y|I~=y}n$3L&{|>S{#BwHe2g32aj-9-+L|T
zKDqa|I1o!%HMp!P@+~7my62ty$ksDG7jCZhMSL#~fOkIKnW=VMZ0)YD@Je`~@IXHG
zV~fvu(_W+9LbLbRMt2%RihmCY2O+IXbtj1QcSFw9tu*aP)_a<EW{PVYh$8^8%JTjW
zW?J-KCyVq3(gCiV*K=<kn0!of!TJ}!ZjRT-*72@BOcZILCVa_$90;`t#URU<PwOOL
zG3$G^J6*XNO5rzMs6wMyIVe(O2jaIE*N0EO6lpM5-h5An<HX7G13}H%tX&_OX9*DH
zQ_{Dyzokvt&RSif6a0d{8GPISR_WK(PDK|+o@`R*<=3g>7?GQ4sk;W(g(`1be{3;f
zEI!+29yZ9Y<bxVCez?C*Mt2)Lz!aGvfTL3JVJjOFOq$Rp|A4`n!V;slpJ(aifz_5H
z)OQ1uBU$xop;_AZO+Oy>;#MDz3N)F++|lnJ2@oLQ<I62eXkMIuZt`@zo@{BEHH~C$
z4h>RJVPA^;)s#scMKb45_q9l)I0Uza_h&qvTyrSOzffBO=VP`BZNke46Jw&3GuF0i
zocHDwg2l1=7nYYl!u2rO!3Mb;P8s?z3u{NQsY1kG86ptv9k8VRb%K--6`&E4J*vua
z@<hHnm3>59-CxLzicwjc58k}Ed9PQ;lG&YKV?V>8U;ceKO|9IpC7D_8Y~#~vH1C3O
z#vzDSh(X-Izn&4Oygi<WYWQVXHADFM4-h`4RfehgK4w?7g6TF)S_mN8;(+5em#hUL
z;Fk*RJ`)fl3v6Tu#wvI&KCi2(^gjL$4R)VUd6Kq138L&6zLhWnW?kcRQ;U0+BxjqS
zLlcKdEpk_YhMdniisTb4`0Td`E0xEdYLEOqbIMpBUt;9cqs`bLzV%CD(pvC69tkY&
znhbW1L=K0(Uy~b9ml&n)^gVnu%nv_NP2kplsiBMjTVA`Izma7_yH5a~SG`m;$>E7p
z^$1U~L6gTsqMl=m`-InRY<s(Nzom}b^KT!+L`h($89q1Sce-<X{H1|!kCVrv`H!e=
zC_y}UH;hs2Rvx!fp@vDHLnZ1z*5zG*za>fjpQ!?T>D&D<NqOx1&kYNIQp3R~t7y0R
zJ(pGnvBfvBLSmSmisghLL9f1*+$qvV9Df3r*7I!EVla`xppk3&1V2MwhJ;H#<n<A3
zt)f)e#>Jn-wqE_9hC~9%*4SzgJ+&WzevO9Bqr8JSS!STOgEaB0llox0yp6@ERS=E0
z)?Cx%tIk$5Y=yLnlxVC!!|QbUB`6;R^h6R4y~a;?rTAM8<_V%+*TK~@c3(8w5kG3%
zO_m^cMKjxJ!Y^8Gz5erjcr@_&Zb9$n;&rGRuzs83+lA#|#||@oU@E%#Jr(#J6a?Zm
zX_lbZRDb2CYz_1WWjCTo<ezn>0u~lO9yQ1E-(`3tz=n!7<!;6Nm8hsRlC@G-tUeOA
zI85xFilTFJp+8aHKmUboJyrJgMaRak>e?2!yB3<G=XO!AG%J749*%qA=($+|eUj*f
znJA8U^MulFPR>f%^7*fy=fAdN#lWaETIB-8OnHLxDoJBphyJ(e+3iRbEhoUn>9qP@
za#(N_27gP}Fs_Skyww3>Dy$F#FV1mxeBBbns_K>x=9-lYMq72af)x{;ERnEga3II(
zA3Yn7*YS2@Dh!9MB-RyTG9Apjjy2&gMje~En@3lTvYDk~^yg5CBCh=Tbfu9<miDi#
zC?VlEvBR>7spJ1lMVP2Qkl#w@JBfedqmG4TffC^iEq{gOkJK$m87Sw{qwxZpcDyl8
z(kYH}T$1@99lL~49VWgiX%NGbR_wR^y0KS(`v@hh9augi(H0dg*n+a9k*z;gNV7IK
zGDEy&je$kBmpVHh6Ir*p`eF}!UM@B8i0UbNng4)+esZvaok~pw>R4O|NGPv?-nppO
zFv11|P}76+GDPYhx~L@bGtm@vTva;4LHW!H=i*y}(&(e@k_ddJ1(zY_WqXsY(jOp8
zRRUg<QKM+0-002IMvF|X675eQAtv*jcZQv1jp9jQNiE%(bw3oU6)1v$qf|LZEbuOR
zZw$r$ELQQu6wzDHEu9EdFSmO<?FQI5ub-z=YS2&kooDh&LZ^62uSp%7ox8)ZQ3;^Z
zAHJn=TCI4uI4yTdaOTtfyuLhcyFMEFfT!^-82AdejgL9NEpB5!xqmoq7V{Y9t{zH1
z@Uo^#+93JqFju<^(2BC=BiU^mkb088xOU!K2Wj*Q02kVpLr4w1*YFiHT}SyHTTUl;
z>lTG=CknwIS^9VBvkPBJ6exo1!#ori3(HW~L|w04KcJZcZW~x@`tvGfSdjCto{Jz6
zeAbCfpU7(DH~voM=hb-|NIbnQVwx%xq?>=;TpgcLyh!HCmoQVv;B#JT53qq&Pe=UN
zsUCZFwsSdaSDwUTFe{+kFLgrk{Nxkx&r_v(o*?ECfCBFTi96$8jpEe57CN5?zK&!?
z=EVs2RJ|8UotGjyDbH|^E9oz=^|TKb>?m?sR!Xl~TQ55E`kKFuf94@HDpF_IGru{?
zp(dnYYA5Y!z1o8z0J<-MyN20ZE*Frsd!K&MDsMT{wYwgme{&611u2k>B-n2`Cr8<t
zcO9dX+ny|qd9+pN*!I%{JZ+4I`svme8oRcWaXII%I*(`j(}pe28oYushxp9<u?Sxb
z(52L0z(R%|&z<Q{J)d<6$9QLZU?O2z@FClL9k~b+SN&&EYb>oG%3&c-$ckJ9+GWrT
zJ5@F?UG#W&NYGy-E$DX!?(fAZA-9gwfyHgsi{{s{`y|=49PBpClL0^W=}0nW?rGC5
z3jm=u*74FY5l01&<<&V*dZ(rxTQ5U=-h!yOzZ?W~p-)Qah=iVT!LM9WCg}^_CFeqn
zdv{LIqn+O4I!GF0NJU3sB&fSc{ru26^F_kd=W?Sq^0Xc!;e{i!VC=-G2oU2GBpZj4
zd1QNU2nJ15Sqwp4&UR*KyQJ*2F@#P&0w3ua$y{swIUn7Kz*@V?KbP8W5uAgMiE+E{
z((}2W@4n%*Qk*y$e(iQ>m==sa-s5ATx!C&L;5v~(O~0$rV|Qz%K<UliN{rB%HsP2+
zYGk=qc~+!Rt8YW37R;^hBJ$kK9yKK|xEw|-q02-vk!U2-M>jqtV6AF)ed9JR1I`3$
zThVnt3<HbOx3Br4dGEfHC^&sy#>$-_$hbWDtn>oxwIFTm<L>>sMZdjg4Pu6VmlnMe
z;PkKzNxtj|ym$O|vBk&JaFXFN)$EncbVbh420B!y3Q_Ie%cxNJv9FOVjn><xb>Dxc
z-y1K{sTTfq?OOERz3utc8CPo5JR87%y0(Mq?}x%O@DC|3agGUg>ABBTSug~#llb`L
zKS`bUI^+Bpj<lpfiXljX%w;=a4W4&tQ`$eYvhIBkuH^@&97n7=;&bFjf!LK#$i0K)
z!oxi8Qjynbqo4+oj|T}P2%wB;fefi%(WJX&Ge5mP51a1e(w@*kJ3`yd4?F!DzMilx
z4J`xir}KPId1RN!Ei=eQu$9%#@q-D?L}ok2UV4q=krSU5DkC3g4mdt|;FM*d(spCB
zc_CHptc#*b6^^6b@{IdvZ7C4*0gcH7+9(}T$!8^zOkR7$&j7wGkr$Nqn$PVW6~3~^
z$)3hy@fg%YqG1I}So&F;b_v_Gi(Q`U?RDXPN%M-xkBzrcZqqZsil-+EW(~X=Tqfu)
z>fl2yWA0uZUFpr?%iwj);-;Zo1BQO|?JI1Q*|CJIS4-jc!5<k7%Ma6+FGC%&K?27&
zn2kz|K&@P+&)@rmsBbyx>#UjkrXw5t3Hu=aXcV=9-{l%#$m*}r^LaG>4)5bVsM);l
zUi~UYEB3+|zLtK6(111NgJMkK3x7iVJ?d66%VLB$Zx&M(@ss0tC!iDe@RB^WCyaqJ
zP{zIxN&cl=Q&K_Ng~x`+HOVlgV6Pr^;S`2k;IcNTBz#HxoeqcXYO~^nNQNm3J`dxE
z$jeAC)W=i~t(V`+xF8SlsgWu5*@O@K53|-PgN{+&GE28CwD_pKJueBaq&@7eGJQ|-
zDbN9Kpl3A#($XbbsmkFi?m=$G6~eJe6TEafHo=CWqPrnz%j?MY4X+c`HS>!f@D65&
zuMhU`;jy)oTNa$uNkF1d*p2({u?Ku`0QwcKbHoCFPE{6Ee**l?2uaLIUl&<AixK(+
zKF(k(!!X9Oo;ZoTPJOmXYp1bqe;b8;VZHQSydG51g*cT!kgarzgUDcBAC>gtU@|(&
zDKzNvt5rn|5mYFQ^aKSbsGnTuS^Z;iN2<7|)MZ=}XpDp56#y09%?Tx6`4m|oI7Yf-
zrNT`xLu~HWAYFaF;KSKy)#I{pnh~88f6~wQS=*eJ@bwtJ7+TCPxELSX?##5!^Zv*B
zBShw0%hnDb7rl_!T6_JE04982X{A}NYq!Jy3Z!CQ4x))hUdzGLyI4{@jnRQ3`D(3C
z3bLLtzRqf>n?3{PT;MJ``-L;BLOKt+8KZ9Wt3F1?{nWw^U`ca85Reo&)IkG364Vw}
z_q5k@p30I`A`1QT(BoB@$(IliZsSJ>D}|%OU7|cL;d^qQDb(CBFhO#KA;t&gupjDM
zN)+stBz2d_>QKtXTH#1am(NzVm&?1~`%2OjgIL^+Zb|N2XC>sq=1{+T;2ikOUO`-@
zw3WzMxLPGCIGsNDrv1nF$hv*{fr~`HH~gkqgzZh>r}ka|mjZ;cQaxXa17nm{*j`tf
z-H=Kf4Z_JhGk{ZENd2ye1%y-Y;{5NTe@5b5$R&EWbce(&o)^J!uGzMU2oXgI34?g2
zOD-i2LyA}U`aoQ2K2}Xo%r}@amB_y<Fo`}IzcZ)ha}x*hQbmv2y*@JAJ{=Sh{Cypx
z>5M{s+R)axC;JS94~!YuO>@?Xnhvv8K5pAvKR<a8M$${!u|(B*2<j8BUW>-{i2GLx
zmlyB|NBUNLGG%h_)sm{Lbg6{iQ4EEWn;4<oEsr}5YWP)Y&wh-+D!TubsHSiQzBmhD
z9Of_mOLWq*Nz@;_^?683j^8S=ki>u3C#HHJEn0ry@k)d>23<4Vd{Bsqo2`$rD%QYb
zCKyUoT%yfEJ>;?AZDqtvH1NoCFDel4l`@~?@~^!b=1Ux(bctwdvg-j~n;kY=*(@u0
z(i272CzPsTo<Fk+^So&oh^KtW)20~@@~v7F_H}|K+mR*;Rl01TeTd6FTJ0@fPUnqm
z`=rAoC$`WOCR>%glG`{o77N6NK~8y8>}6Ee`O<B(0Bb>RGqBe!*)-l(TTKf(MVd0W
zi$Y8-@s@2OEMZ?!U>8pZiN;P)UHgnATq?QJtS`6G{sB#^@`bSpvTLc;H;kUi0ws#U
zCr!#^YfaTG{>o-GckZIFM^%1A5NEcO?@SgApn38N%1&3N^&vEl<iVI4(vXsJKMVok
zc!m(=vr;s?Wt2*+?0#XUg36?%%7Bo0Q7ri?;92!=d>UxC2~GCnE_)+vOTQeydA*wA
ztl%P@ul`9kd_6x@r!%1FH+21!lkn7Z?9g*j*%E<Pv5yZgeo;S9T~w6qrO;V_&5qrv
zU8^lQ!0YuyFqwH$NAzUrSVBtQKm5c`+ZHvhTXj}qRu+ErwPb+d)TuUu4Ku1Fb+x?`
zm1PjS`zPJC(*Dj8qnE1Y9@-!#>*w=6GJhs(ZZ7*bRTVYF`grt?xJ|xwO@?@?%V26d
zA;<>BA%KEKoCs|htaw6jitihtE%H4mDRDToJ5NH<<UHbJ&;>d7ZUwpb)I2qMF-zq&
zS45xYXAGz07<#&3<H)14N4A{$=H@>0@2Qb#>jZ7`Q)ZcPE+nT%ypqYI=@qbZ)l!ZV
z;Ve<*;nHqK-Vx|jS3M71GQ}^J4Z>idQ5nl^zo=J(5BB=GVekV7K{$F%fr7H4c>w9&
zJ2bvErfv*`SzPWG>OVLcWf0=Gbo|c9+z0=_cRUEZPPNMmd;k|l^EOUtCtdM4R3WqI
zIFt0QC=rMFy+{9(@x*e2lzc`s`i%llkqX(L55bmE$0DTlybLJ0?p_&s?_^vGKkunM
zOPM5Am5|~fRHYUNtHk|I?2+&T?q66|dcbYd=Sc_jpiW0Yd&CkA{*%vhJfWQ9h)iOB
z128l-MSi*@mgk2(geC6Y0M*D-4LkY`Gv4OBQpo0dmX!qj#g8g;JsDd}kF}EBj#y&S
zWNijPxeKIzl#usW#!3|cGs2|l47f+$|ASlMHJ1sgrWPd=P1OchF?m(j_tPn(u||K7
zE#RnIw~C5A<{WryHO=;QYrlW+qP{xc7jc;VFj=Zs-N~6ARr$DpDVmHQzzG+l0{d9F
zO2{D8cK62b7Y?u=Dgk(0J}}oiJY9zFC5I!zY*5VwVCn%*Fb~RVd*998;(I(<0P$@@
z8cXZNN`veABd&Rk9l3;Dq2Z+bzmV2DX*f?4E`paKR=DwDX{*4-Xy6H04yOV?bs%B(
zCPp#yQ6<jlQb%BEqh_hD3Ad<vW##nxn-`-VBo{2M5xcw}wFRk(>ue?pv6*yZh}b)8
zu;BZ?R|xWkhZi8qZB{XXI-rhwfK|W|K((TM;uNdA7qyI@2P-}~J>U8gnf@K%lvbR=
z3&_(Fn7chKJ-hu|%Uz+Epa3lJWB8-|Yp(DXgPMDC$&Pp$^tV#R67?4n^b(xuEw{>W
zz8gULv>u+g`>x&ar0Ey(BU6j^*X?^Bf?&Q5Jb8*01!;zB|D=^Y@ewe^E8fG)2};8J
z)*mU)rYWO_#XeMi-DPRo>7){9;mGQG_2Do@l;ydLBxNzjc9NFid#!TvA&G{C6{FyY
z_1N^ghZ00h+Y~RZJ9;?)Y-(mEplD*T_EbE1Mw8k4MOxK;(LXq2rzjk{IZ0$M8+MkJ
z4uC+tGL~rrv6lhFYgRBTjwy69spnWTY3f!8z`B*Fn4bGi?eJqFznTZ2XMR1c#)mtC
zLpL@VV(#o&;!+bt1uLu_gP#^Nu==;MsR$U}h}kZ-wrqPedF<A6g9_wNPz7$XpQ!>e
z<&LnuxxTzd=7vRznbzAmfBNYOMW)wEI7`YO^@M>F+K9~#Y@}a6$PE&l;immoQl-@I
zBk5XY`o!rF0;p<L1@`9#qED_A<_WH2vt}_u2W6EIX9-I<?6jWcbjQ|#n+l}zAlV9+
z6JC16D*9@o#_tP{ZZ1|^es=GVkyRPFBr$5#4-kFv1K4Ce87#vBHT`B#cID|*klm{W
z8tXE+rh1E|0R+ND5zdp)ku({q)0rbw%F^&Hp5o@`68<qrAdOFS>z*2b4AHyYME1aM
z0f4$ph{s8FTxe!!!Yf5RVN_j_f-L;$%E$}jc%u&jb_SsGYJKEBC62!c^vj!DdalOK
zh!#Mu$ri!E9G3#8J_X{}0PM&_684i7Rqb07KuL>uz|-roMsf@)w?}+kOvP4XS>mA>
zbXml~Q9n+%r!eh00pv|gZH_~P!2RXsYN@S<b2MoQooyyo-MZv(wU6`0x&Vi<L+^Fx
z3~emkA~fUXz_%~&>r~x90)4s?#i8{z2qW1w6==g7SR0;HR+0N2bbzp>w?5*Idrt&@
z{qPkE@+FmFG?in{YeGfHtO#Yslw>JRfHvYmg!e(mJz^mVOm^?>*LGzc@Rpwycz3@x
z@%jx+msLRJGx)IO@NJyFbdseJhXKgs9;QIA6Zx|ZDtki})V{ah0h3^Nu8y@;=jlj)
zkZhQ433at7r5pzgcSoaNwOHH6>1F(`yoBE$OD!G*_KC^tC3Jt(2hP<rtt7xEv4M12
zi?~qgJEuaRi*InyFRWjE2fV7xX!!8Efy6~aIlW<`;fP-Cl@M$x_{orNDR)3)SaAES
z@J-B&y#{MP1)vWu{nb&#FZ&<#dJs_yu4lN0QDrq_3Ldn-64)aurbZ+lA~$gAxU)x<
zYK>eetLY02tW4F?^{5j^?!pxD^?cm%aP<-%?@W6D0`d~%#qkKQt4LZ-(qnP7(vr*G
zd2!$6=X4vl^pM}0wAO7F86I?@%P!lM!yT?6a~SYgENz`V`OcKO=7Ua(ki1t!1MhWf
ziXwR(Jw2`A3HVJ(@r}&I=q`@7*aQ%(XsaK2?1bepR>={Gum_E%ao$_^)P#3Nj@^|P
z{ARgU);WaP#KcS8tKC~`K=QAoppIC<b{KAtbH+^YGA4@tfo5`H_*<!bL$L`{{4@fH
zPP~rSR*x-9JOzD6fAzFv%Q03%bm}Qd8noO@n>>n+u^Ba-?o;Lmym*P0LF&Kf6ic4c
zhZ8+;P9?)?(Q7q&%IJkqPwNwcPI-b!Ynj|>7>b3{4Xnyhe%75;Iz6^L!&kCSh=3G8
z(Yu|-o$F2XDjXn@$Pm89rLL^-cn!-|-2SxTBBM58T$b9e9&^6*scu)qm=?^F;u!q2
zn>hU&Oo&r*mi26J{6%qkOeYtBU2(0U=OwW_+4v;GrKdxU?0Yt21r-~4EyOpin8slN
zN=I=vFswY51;ST1QKG|XkoP_VAf9=TyQIz6hEhD`_$9KKLx}RXIS<VNT}eK#dO<7|
zKPqJolmil2uomU>D`_(gh+P$p0ox(9eA<E_fz}hE=2j{*O3J&=tbPmfXBQ%Ulefgj
zUn|VCkTFU|T)hlEM7HCsM$~4G8hNZEdPj?g=YGu-n$};#<>t!*cl`k(QSDL8^bur8
z+fy@Ur=Z^9jEq!qAxu|&F5t@Fa>z1l+cWH863(Gca`x^I2tm$aj)dw^F-v~ctHwQ=
zz7u7RifbljrXL^PXO-zr5RCZl8&o_%@M1>ur8gShIH<lc*UiU!J?mB_gTA^3=d4nS
zg7u}7@wzNQh=~|FXNFE|jQ`xz&Kk`CI0(^`G})I;S*+;CsnqM2KgRK{sSvf>%a@wo
zV}RfyC_;4fF1olR)m3!IxWDgXOBHUDi74j2K|DF0Vau3ZcC%jXVo=@)9*9_DgFyv&
zL~>44`a6krSf<J<(D}~3(Z)7w!a=Lx=Y*?}X~9&R`C=_rfuT;%cOQBTl==GwCI{_4
z<=)Yw#UA6q-n$&hAq_>54i6Nm3cn*@E#d?BF%BROBPcl<C3MH#xJniBB)HN#$;qXv
zY3CLDtyeh4EnKyGjfcIY#0OPp^*l_#{odKqe(`E#g6P*Nqk2?dnAvm4$IV0-BK;>>
z(RE3KddG10H~FLRFnSQt(LcDaZ=2<NBr_~WdjU}2nWaq30w-%yK`*x1J0frxG^pXa
z?45U36Fghc;&Z-eBM>NgzauYRm;Cx5ew}OXe&t4_M)ZaWyAUm*COnFp+&5<)P1&<x
zN`d0HGZ^|OabgEQ>PFiB_e}qE<;Qk;-wOBgBIZL%f1zQ;O-Cpsm8!{eALEg4wRR8*
z69bRRnWCt1&tR_C@AB_3b}ToOt9_ts?^b(k25&GjLnKvHVx_@+<q74}k4(~7)IKL0
z%XiUfP!9<<I>U(66W`NO>rlrqlT|R9ELW70=h{M5P03vwq`RsrqxN(v@6S%ZA?uZu
zd1jtr167Sj9a%@A3Q3AX5Zrqox7!XlRzdf>rMoC;&UlJdUIZeu?G3wxq1`h>x6rXI
z&&C`rto|KT+R#$kX_A=q>ft5Ra^Y^z`fyf=77KLS2ZmJwP9Oshz^61Fynyx97DW5<
zk^#vErv^()tOQPur=pscT(BRSJQb5Pyz=~5l>gm~O0|T-K<b0lzy-g!6!9I`GOYb^
zYnE}scBM$nu9Sy&zgy1U;J0!;ijwsw4PVw3RvBYd{p~70!>WM_e6p9;cP16uQ@;uI
zA(1c+_E0K2l`qG32n&wPEMQ|EVv1bHV)GK%wFfiVQnC1yz+Leh&lyXE&pIcYMlop0
zu4K|D=PiHaJ=!>uQV7S643N~nFyMgPf3}u+`YD~#mZ*z7Fw|=WhF`1TKKy-aQA9Cm
z(ZG<I)`!FH5smuEo)(HbLeeNup?*S-souW(>+~K>TOV@}P(|77ST(8WJnqsEr#G%r
z2Gigvs$8_Kz9DBysgs!qn+&fhwV6mnbLM#A^y6f+%omY5nD#>>8r|x%m>BG`pI)Ci
z)5rf483$#6vO+EEM-WGbTAG?>mYen>I@|Q+_q@i4dc{76F-BYRgs+*?x*v;*B$!>`
z`*|g=N{N`S#VpmI1lEeULA>%5ltGU^v8X?}+$%vf76A!tFN?FfRnEOha}G0{n+ZHV
zmv8+zYAerz&CPpP02=)8+j1yHe(dTifl-T-yF|zqUjv9aEtgb+in=Hwb+sLmWOYo4
zUvQUyZsC2~`6nj=yd)B?;WPKjq_T>W3r69~ZwVlYl`+XMea>L7m>hB{7WepjtOVMS
zP%O?ihN5m<@WtQ<ad}f{*cxf`Dqb>kvj>(C2$gtVBU`x|c_hz@(Hi*d)^u|=_mgP-
zCwFxKWsg{$+5%<32~2GadwHT^9%|1W6C+sc#BDbj6_r4*lwTkrrBHH9Sp*pvk_bQq
z?2Tx+BqnT(pp#CZ7Qq=5`omf6*!sS>J@32|!VzjsccTu^DcHA3;>pRQvk2xo1dp%1
zGN^gMZwPdVQck{Gu&aI`40))iQp1Gny{;hTTauVzibG9-(m}9x+TAG`6~mMO$HOzM
zMG(l1TIUJFD}INUl>0DZ=2bkEg{j5>&XfAWCqj{P^;52b*|CK)a#0`f*WW%kMky&o
z);AX&^jBkP;>G$%?#S`)@?lu!`n~vV?Z3?R8~bhMdT9H9WUdQ$9zb5>y%_WtbCdm(
zTcX4O?b$15X#G~}t+<!IVgtXomGw5;_?jV5_7gwANA+>TTs0NS(%=7(q<jNg=&Efd
zz}9C9rQC1R%_>Cg%V5mn-ikQi2?e&9t%wo%1ZKlaSj`R$zU~Ar`kc629RGGO3Sz2T
z=cN3w0LFo#@l^?)t+FQeLsb}u_u29n2bg_xLe%CYj_&Dn@2-$GRJS82zSndDBQ|u)
zfC8bSvg`v`>~TlV-(pw0GDhRa^xdq?`rNYwA;#QZCU6WX{lVkF-G^~~Kv|H|coN!+
zuCu~Y#DIzdjn~J6PRL+OGl9+iu%d1PQ_9#-#8k7e$B|0pwFU|V0m`@l#>i<Ew9WWi
zxfHzt1<%1nyMp9@70;m8;2wpnF(kheJ=iR5z~~&E&%R&#qv!yQd;m(wTSISq|FbEq
z;J()_9<~kt<}O6G{_*tROgYrqyLsa7{1d4Mxc%9~MWB{=aaB(6JGJkY$?o3GTZKLu
zkf5wX^wKk)3*2euG*`Cl(Us+^bJA!+!lMn?xi-Y3edNxl2syN8C9_fi(&DFYXW86Y
z9T>egKTYf)56aO}bETcXHLpsBGHW-ldL~O^``Om}g4sI9H`Xk9tkRI?lv%^g>$Tod
zYYMt*jtH$=DyC=vmN8QnPF)IygBB>*@dAbr$g}uAFspY}$@^b<gJRH9^5_Y<TD*=<
zYgLgVOu>%gMFl}wyw(g*Ofh#C0pKM**ZGmh9^`)@rMt@DcU7j%aWKb0@5&BtU!k$n
z#5qZn{B6fa5a-PpM%FCz>Oq+UmG%C-bCs&zV+{`DcDNE@+z0X-pl<ls9jcc(P_nL7
z`z+@ftWXTA8$AdBA-!o`n71yyDHdQg{H!qP!g>!1c}7vR5i9{<?~wi7bbpUs1UgFR
zd52!%^#@RDJc3c6tF6Wq%VMd==+ZR4@%lHi!t^Z|#;|O#6EyFS#b?rtO`%!^@eZw)
zbliIUFyj%a=vTl<#tP`9Z<p_q_jyD}-hBs^Kt4-w=10x5(=9;H&OZ2&8cZ%@X#rnV
z^c(Q$rvoQ6KKs`}7p70mCSbJ`D5NJ#@JnV%auoMF!nqs4)`#*2EEZ=f)uI<b8fzR?
z5ASvkkuhS?!;dtF;U>oa)~s(Zb(Sc<PZI(%IEz<Q8(4bK&_jTH48Bf!C^ar;Ne8c>
z^IJAY;b6X8E!1dW1)(nNeFvc2{oOH_?qLh|$LyRv#NY7+gr1V{;vH+nN4n@y4q3}Q
zB;}dk>H;VZlO^xUWB<D^bg4F(Xx(JqR!7C_FhJBjeos|_8>=b(0|nV&d;|K{Tr&bL
z={KZBxr6u)KfYB=EQRaQ(8IJOYEcWA1c4zIfVT+-x6ofokg_7?U0VcfUR7vMN+xq`
z?Yzs-2g2=Q$xyH%uiyk;2?6#nF#b~@OVfU@N=I1kPXkB&^C3S8%v_iUz=;S{Sy}{C
z{}DKt{Gk*uG(BElzF6I^4z^`bz{?wim}~rp3=#!Sa>QX#@z}qUtOH}$-03d@>>A2*
z#BaZ{uI#c2!bA^Y)RI=aGV*_R!8{m<SCg$6=il@zG|KrGjU|7Fh~{9Y3i+J9yo0LE
zUk)@Qc(TrSFQ!E2()V=YR$@p(@L4~h;~PKKi#2yalzR<8C-E<56o3$zmtu&bk+5Qe
zhB|NL{zZeb6zn#j7}SEOxC=vI2T+t_=pDmtYG-YWO}awj>_PUd-vNrC4c*X69H5Kd
zvmb0KM+uyo%{AeT^xYL;`qe46^b(cD>vR!w4nM>9XaJPJ4UfN%XqD>XC!aY~4O|?q
zf{bYX-~)YDG5}(}1F``?aeN%>mqWneeG1A@g~XOVSeJmR(WfSsVFOFq063W^7L4MY
zA0zJp+Z-ZxYL3<=GcIfGgA{EtJebhuaSH)r)i^l&31ldZ?%RgWJ@~+>p8@NTa@D)$
zfGYLs{w2VkT0jN;$$jS__m}N<<t??~&gZ*xgp%oHS<Wq>rjk-N9#M2(r#fQGrT8`k
z#X^CMyykvO=DI$*#yYp;u|0`ikUPRN7Zq0y%N<^a9d3gfuMXhp2NN3B19tu0cfNpu
z2#VRt83{1b65u8Ree*jSpy#w47M-r_c$)dh9hML<CA5j$T+oOoYZR%!H)^{9X_YC{
zbK{~0fT0qy8p<zFiHB&ffI3wX=(zw3=U!_*8e}3Mo?(Q#0%8DPGP%4k(H1EFuWb#w
z!7<;VE~qSdqzmaH^Oz1055bGRoBS>i<Ix6?BmEJGsuloWU@(QX4XmBgn~+)e+pPDL
zf#)u$s%AU_jRY+;F-CrubZggVbB@UzjKobS`kw(5>x%EO_}r7!$=ofq+35KxepL*d
zqzY23Oa&05YY(7oz<{DA9z1PPj^cKfLO>qsk%UW$137TP^g2lFO-YG_uK=?`4N4nR
zMWbo$0EBrt2N<@1HF|x0JoaKGoc@)_jaP4*{O5xUt8c|V=X<ohiw`N+0C^Brl|nP)
zqi28Ft4y+$18_9yI)IPT@WJRbfO)S5S&)23#Rho%<7h-|OQ2=u+d)4~44^v*4J9#o
z0x}5}B@ANS2kc9gCuEYyK)r=F-z8L#V(;610rv4dAw|B*TdA+WM03>xc$s#n^Ikz6
zki#7lLLDuDUMfd-(-<jd5am{ot<=_ljls3F$Gf}$H#yn-Yz1g4LeEYraKNc^pRq`x
z(*9&(%>mb_V(?UP@bYTEO@=rUO>6mWj4))0+-C>gFcF~ZM%SAKZ*twjCB{_*;PIwG
zExhmqPzu%~Jhvvoi`l#mmii6x6w!2rTPUu+$T_Alf<r~=57+jjnZXZ$K)zRy__Zy(
z@@l@;MC~hLtBelZ;%;*m`29ZML%Io8OyiN><;Bu&x$AggnaVELwAy^m*MH1HH^%8%
z3kLy|(|ehZ_M4CE?_CIjvbvy-_sL$>5Q|`sw+rsxRQa22hK2dMg=b8mfV0}OOOjLV
zvL=gxp!z!Qd&fPB4`Ra!nfDsVT*7VI0rP2;>YON<K5~7(dQ4ca(QOmpv6%+|gD;#_
zJQV8W3f6RDP!ibXdPfx6s@Vc^XuKPc{_4s0?klq~EF_eTFSXyJ@D=@Ht(+3Imwh&W
zPm38>s<0iX)NK4?xSY+Z>>!|uA{cQosT~LTo&~T%{NR&XoWPz}2Y_LPC)!&EAu2^w
zb<v{9*};NNSeVdqYb|zO_q+~H^#~2dgL~~;?Vk;pb)uHY2Tr?-ZEb0|<J!2d7_>2z
zb>AnZC5x%rz}?7<2_B<m4Z%!4Xu=5mw$dV!1nEj6MDOmK?Ome8{0jQ0V*1$F3*jTJ
zCNSHmcf_44h_dQ=zq)-U(6jHLYJjN5XM;jSG7P6ctPq)528&i|N4&RYHq+T{TEON8
zAmNBB+{o8G(~D!mS7<XGg~@W>O9A%`pPzmKeNl7)bsF7f2;zJ$$%g{(Qr}ZJ+7P3>
z6=rCVPx~d*XM-S~TlOeKft}JhOlMTyEhsKL87{R@e1aY^j7mNL9O>H3HAm1@m6<an
zc{hP7t;-O2EjSl$14F`go^Tnowb=$&t0=2NThF2c$gd{B;3?iyX~2E+M_zL4L_na3
zQ~!~AD+%vPW0O(mJ?`Kw>!>;`9yGJyI$R#wzUa6IzEMQ+;s!aoY%TR_hzHazp&bly
z0?9a}MDT^rFp)WrdPGUT69{_@-aJa7A>;=wE}ek0=EP2r3RoF;^oy;Tg=IEKrB;A3
z3Ek(fB}NQlHG6>JELWiYI-+B2(9_O5`$3B<s&$eb+!1!n2SOuXYzmIv)7Xj!wxEqi
zoJ25BUlxv|eI0{YnVL9b!B>YG6YQBhB9ou-#c)nm8e#aS$eiB(#5-6tizSfyu&tL!
z=E-oF()u9=WEV8BTzU^niKG{@!&YSHpuTQ;-=)&ZIIL618&Jqn@978NRRKHhycH{s
zfvjyET8T5x1}7mkdkoytvU!&@BF4ki`r*3SK{Th@Q-T!|tQVBohIftFmOlB-2UMac
zTdI%QW%1Ik9>&<QiL2MoVi{WsV^RgBG@$GFu+?RN2>5cF!Jd32s{8QJ77EoaL?k~W
z3Rq&Z7{tR#Rjhg?FlQLfAlK~lApq0Ngh;c6;yT&a+aXw~<ne-e5c}ALZjTA*yZac1
z#8(nu$fEzMLiq>Ea5q&xADX-R7D{OPbNiE-=cV_>Rb2=bQrQAnDQlU`i7~X3$$oJ{
zF0>lP>t~35x`h%vjEiD@UeYDU(hw9pc(L@c_aVO#orG;kw>KGUTGCXCQZ!+JJ=W5v
z&ccY6oui@1><)?^L)-V{j+2=lv*Du9saffD(=1o7Ew&uwbyLd>z^cbJ3pqtJ=tSxS
ztyHh7F~~WQ^0hl5_Y9?Mp@e>Y4{@SUrhZ;6YmweFBwa-I0Kzksq&wlwHk81B-XWCs
z7+2rK)Z38PW*o6e8u<JX=TsecuB>&6v#hh-xmCibP!5G$LawAuFP_!}Wg77;B7@*Y
zF_h{RgqAM7$ZhhPk;8(h*O}6^NS<VzUk^H}Fk?J=v08HHk5<Zp2qY|jrc{uKp=|Ep
zhb7~Pu$AGNyhBS#7neFb=&P$vyYT^paMSwNe9pOb)LuV^-S?opL|va@-4Tldq?PVQ
z=8b4C_93NsL*|f_Dac{Mp~VULQPO(eOCrO<ouOBcv59vT)RavS9iNHtwZcDau_)pK
zAqX7Aca>uu5`%f~kuiXdxt1bXTbG{y^hdy0dOw!K|1#Cn0Xn4{fB1H6H3E)8R$Saz
zq(c1W=xi*C4U#A-dA&Es;G4He2<gFSc~N=)Aa==fhMIWRc=YwLfF)PZA<Z`uD})S(
z=fPe>F`W06S*4)lTN|`YjP1uHgl1A!&>28RlS>)7L;gIx9P^By@))V@`)j)udyVP%
zG*m&{#ate=ArymNwR_It=yw~C>j3^6cCXnRyFnkGVdV1llNdUw8`TF{`yTxq==b9@
z#SMvKWr>urT|XBnt#hQA2^1XQuhu>ez6RLh{gtt9h2XG`_Aa9xDX5ZYbGe2-lmxKX
zdEXX%;`L=ML=6xdg!*Q84<q+}t^bS_JU=c!-i<y~@-$U)geJO@-wurzpT{_3n&^=^
z93IBUc1EconRebd$8+GipQOYkH4A^@euFAFU>bL5a7XnBrNgO6Bf0{CyQ_5~cz7i_
zWE^zZNOBUHtziw=F`#a;@^<wq%{I}(NRN!lh%b>3Bu6ubb8Y2+T=e1q^r#CZpT<{<
zzx#cI$L?%vt%+3U-N03(Q38)arfR2#yqZ01X^6gfzgRo{gqL*eI9F&nO7o*wN*0`k
zRlgBt91(pq$)HLE6(j_@>-@Ru?zJQQl@(+*tU$Zw=Uz{-1DQ@<V3YnbbED$OHq2V&
zGhMy1=;5G7;QC=oQ9_0!yWz}}JiLN8=0nRsq3N~aZ6mq%b<sHO4t3U^;An$9pThA{
zpY;=>x9^ocBECBjT0$@<I8hc0^D8oj!|qsqDw+Q&N3mj~q7Z(dUeBqHBLVYS<YN{t
z^utz7?5>nfep=^BB6E5UI||)k^eJw*y#5lQZ>Y@Si8>!59((hBevU}+omT??;Ih>T
zB`?xptP$<L=ph7)q54*~C?|tZS#dcP)&zpf9~Iw%To=|uK^<Z;%%Yn5N7G@#86#cw
z_$&205u$AHt^LGFIuCIS9c&O<CD0BIeFhQ2#r>GR%G)n|Cyr|W(HR&~jm9F8qywtF
zSf%dC^p0(@ItzEm<SiTCA1x&IV~}^T+-)$!L?8rq)wN{jV6SQ4(*C8a_omgQto#Fl
zeWva;I)4w^mgn+ihc`csx_f`sB8rN(^h5Pul?+SYBV<CsMM8#{6Iji>4e8eLbJ44=
zz9d{lv6QoC)@1lG9{hYHOeBx>?S5>4*yq7kuSKfaQDTMX1<j#=@3XZ5*+*Nrun&2k
zd8qvPWyk5bv;4cVtBVZw)Qzf-cYdhmIxr}5luh@;xuCDJmFUD3pgag+E%&Y7Syy}k
zQq`A8Y>8c6A<Ke#2PA#2e)*T~mO;U4$(+e{rRDlkbYX{#n4M9c-TQEnW4JN!ypHpO
z=Z$+fT{$g}a9?fNlh$A{MYKI#ihtwdYSpVUb@kctRl8=~kVR;jA>s6fC)u0zn!1Ob
zQMNN;angG0;;V9MM4?EU=9M!aHn0QxS$4JocJ`e3dFV9Bi2Z8hIhpBEN8(y!lwdbo
z*L|Tz|2U2#dk@n>vh6rd>^thReQ9PJjWG^m3gfhd2HC95&^w9k@A(E0@j6ppp0j^1
zL43#HuSVwt^(QYv1u5x!+;*v+45y4duTyiz`!SCQyiJEARqpDGKNM-NnbZ;>4rd>G
zD$tWvr5z=Et>d}IW*N#E>Lab)JmpT<BXybjnGb(xhR2qwEaC#8a_cnN-!$y=@oYM3
zHKOp%lH?Gz0Fv3#p|tX;?%#I-JWvKDXv%Al6?eXfo)E)Ex&vRLiAI)Nxzj^v(!IlW
z_N;o!lR7P73{7G_npnAuI5Ir+0yDeITBrb2nBOOFM#w)j!{8qxk0**T5adyY)mIgv
z+rj0>VVD!-o6isHCs#DbjB!I)CWVbxa8^U!<xB(e<Z9<I7BYI5I4TWpa#SIJfuq*h
zG+SDm3x%ZkJ6<AVSy2-77+d#F3LWLSh1MUKee2@A|9IfU#{-e^09{6Z3?C~1LbFs8
zR6qw^EcsxpPDPC+0`Hf|v6U_<=QXT`I$Ar)$dj}BF`q(L-rufttbyp9<a>L%u-QgH
z57}^=0Qn-c7T1J2DF$tl+l_SFekO*qmLf_%`K~+uCQL<)(hl$6orrWK(EIL<8ouUD
z_JtQ!$y@40tVJ1OBC+nIL)yv;J`B)chWCr(QQXUZem0j_*5;)<ef`Z1zt9C_-!GAk
zs~=mklZSRz*hqA2a=InwhzmxhQ9Px2CePl5JG6baTuSJQ(aVg}C%dGu(|HlZ{$QgT
zp)#AUX<;SGF>AB7A!0}j?k&j~bAKw0^n_UR<|<l>wxc;e7lQM5(!(hEd?MiLAh4!1
zo;e2p?Lw9?QEjAba;H-F(n_1-y^n;(3vWEd-t60#h^KQBZQDaVbzN)S8(yM153FEz
z7x7jh&OK%2Fk}aMcSlCFRp*Yk&KHJ%;Kn>Is6(Aki86!7l#YWde%`o7tOjMw*=Z=^
z&5>fP1mQdea{ucj@rxpWwowWp5+Rz8Y@Z}1Ni%>EaXgH>QV>{b3En^eC6K|wYNXBf
zu9mj+wt0BllTEGckEmTGnB^{jTCvXH8BR<ue^0R?b%hzEuU_RX&sbo<oTQj2K%P*U
zgJ1s-K+^v^tSaD|5^LR3`0^8R4aZ~1I0`c4%E%?>iX!rzSiFvmiQ4T;8?XYdsVGZh
zPio@z^!^7tOSSO-5dqT-P{Pix`i&O_CBj}{U<mj>pq4WFM&!kGg~^v<Ex^L}1MLUm
zv-8S4avV%OL)8Bj9b*UpEpV{+MNHO%Ut_SwmU+<DCpNoszzrL1m;1O251z5<7GM^Y
zM%D*3X^_;oL^FZr6_9~aMX&jHgGTU(P*qWqff6{rN58w)WBU+wYwg32o3=$kFDxiN
z?qT4Efff+VZgB?6=zoAZ=$|!%tPLjFea?^8<0zF)VvHu?9$dZomb+PHnI~<)!6fNF
zc)KsuScVXgq?c+}Qtz6T9DuwP)ls}_mSdm<uRQ9%0)fqlueoeg^M`u}4AJ~Rw<$w2
zI=bBuZMAAM$A1M94{(gT4?w4elnuAzf-mUdK>qKin?MBF?MuyifWC--9jxF{WI13k
zBrZ+=i!cU=QMQ}j14OllL0R1!;-B*s%raEmkP#Wx3zfwpVl#V#8vK6#h~<*yV4@`y
z8v&Y)6_9}~YCJ=4#jM<8LI)$w{W=<AHE}RF@t~^KEH@mHb_TqFuY#X9WH>2`A9oFN
zYTnWUv7SN}dh$BIs@AZ~As#835od!QlTzIpMl@cJyQMn(DX}xH#2Rm4JN1noUO(li
zdfB!HNrE2MAU9c}ldubQ$g(++&<P*O9{vG<k?Qd~8i03EAp;;OiN}cX53oW~Zr+iW
zM^R(Rz<QKRq)0<Z^#ViUw4T7uE48vdV&^KWzW)4|xhQPoZYKcdnABGSG~XjmqvBry
zf(3~!kB0Dz)O$4!CKNo$f3=D*(Jf%{Rjmqfej~@Q-;^i-Xnv@S_giJaEnwaVG=pX4
z$in3G`zPQbxGq;z<+jcaEBOFkvmyY`-9s23-$L%UAbl=Y|I*Pr6<emNhowY1=7fSH
zU%x=wZ0;7Vs6J3Wn3F4zm%bAeEDE?|FVo{|V7+NCStW|BVBKNrk;*r@v`&CSLTJ(U
zW%B^%Pdgo+p{lKpFjoKU**GXMhR!{9xWyXvagn-O=^8Avf1pN)deo*Q4>J~~OJ%}{
zmjP(lAJ4{#A;qSylK#7SDXd0Wo1}UiOj=V~r?8rfs4%S1{-!K>mgF9MZHnb~m;B;%
zypFSe1R8Cc$cq~Q<8mlF6@l=TKVgpLi)tdk?6a4Uf=mb(UIrFnti}Jn-{1&$D?m#v
z$FhC`@*Iqw0;*?LxzSz#W^@FNsuHeAsW%`WXaazw6IwIq^I8PaInL*Mv|gYLNdb6L
zDgG22fHqeHOap)czkbXSZv#AdPeu+WP)u(<oz#mvgl8v);3&U6$IHEhz0jrij|s(c
zfy~Sp>T)pMtLuO<_W=J6yuSl8phsPzTOl{PP^a1wprJS;8m2$qs}+FP$OM3jj(0di
zAg(Xy{sFLa&J^Pr;El9^Rl#~|hLV}5N^}BK5yYIRkTXZHV)^F$dr5?zxn+bW$=_~o
zwilB1<Iwy1+vkff+5lc>F-VK~3nc6^K25}Lq)6R&XSsXgLH`^(s2%|<c`hk%;Tb5p
z2zcyRkHFYG;3X+rpyk3m#lXYaMitE*A|0Wi`n#F{yv&UZtH%I#d-5@03>pB`I4ZIQ
z97sG4kDY1I$P5E~-Bm$1IB53T2Jjsy7%+dLLxaNlgJ8WvU6BHy9UD;g?4?R%5+1Ni
z<^rVHe1Ea+mkfR8$rmjnD*YR9(w?8qtOE`tXC$6`%Rwh9un{x@!~-xnC(u<<Qv)1z
z8}x<jf?kB6OSa*2Sd$UZ#&2|0peG3Sl90^ToK~X%Xp7`80!{P%47Q85#icuVFMz8%
zq=^xX53llt;_!BX_vq|a_K|zk_W*PX=z(E*7w8H^5T6BF{z_%ADSsZ8B%a6ce@_LV
zbmLJ=0N4+BjnKGZR8WSZ3Iz;0fXP#n?lZAb($%oxT?})MU4z8{=rve$uGXR9VRzu`
zEkISok#_?ch3@bjQmWrj1sd!Ntr`b3XyEQUs^Jsdp4x|Ew`F<BCnxG<`rZWqM*tmW
z^4>d*+hqixX;1`9sosG-Rl8V|`G%uDgrN|CXut~P<^cXHFROacP_y@O$>s*KU;>dh
z?DYbm>&BStewVAqR+i6R$2n@+c%!?WY%AC)2g)(6I6A3~*sq<}nS9RF)cL?D!d^<C
zF%NLv7e7x455bm@u^GdyWq{Wb0GlOlr5t%2ZBdUkbpycJ*(7ne056cc3M7TF$8e#O
za8+<w%bpB^<w{Winzn*Xp}0V>M~>}-`Db#~Y%gs9a0FIENfJiHilvSLVYgj<e&_Bx
ziDRmC<%qC;z#2`yGvge>3BiL5gAPfEiEK4+zD?kan5EAHJ_RzRhO=E007MJq)8aS1
zJL9zfv|VE{LO^lp$|c*<J_z*~hbIeA-^iR-Yu7rcgZcZ+QF>E>FMk6x35i2DoXYlN
z_G6?(z$d#RX_>K1H_@+@#z_t2f3^o6U1AZuK_FNSK=W;&xeLn#$ad|c9(Jg7<L2iA
zv#Khwi%9mn3F$gL{gF@GuS33xb(u7+ByOgIw-{xkR@ze$utTXXtdRv=>Rp;aWNu!-
zvg3e+r%0t!fkuT`iSHbS9rn~zyh<O5u=#tXpEqhwCa9fiH@ew?LJy|E4x3#FFZ{B0
zbPSYDa}e-EKYQNgjZ?r2qaj1A*S6NdusH+RI{^C8y)oj{kjg<F68pRBZF5p?oG*mL
zCj3<E_qq3nRr^m@SU~GsQyBW0WGT^|!0>*><t{f+Rl>3KMW6cuD!z8h{ZajtgyES{
zPlQ9K(>_`uL4-Y$)vQ?)bQU7`BCk#L(KyqYLY*J2qbdmo2I7ScudtfERGe4lmf}z&
zMI!_xqEuads#drV3Mol;3HI;O%Nuf}n5WM)$v}-v(Nd+v4o6MwV`d>2n$7fdKPej{
zVe#SrqUtT6s@lHyZ{pA(4Tq9OknR-e?r!N2Ns%<@?rsDGRJubznnNR?f+Et0f&vyO
z>VF>Z{oeQYj^S{4x#w*5UVHDg)|~TsK64GL-mBSYL06$*jE!?fp@Q5)Bnf%@to~)c
z{43V3(|O};8n3a7FCJ)8np-XLC8F02C>F{=vCy)wn)&t-rv6*l>Wn_lF}&2sgTKeW
zMBQ%yRTH;&{AL1e+S@$S{LaqXQ(=T#Sxi4Jhr;DHJqb46l;4q6>2t7kTAvO|?8wM`
zrn#CgWSkMJ{D74`A^I6xgSzbRpNfg)=|Z%uIrAkPfmVq^RcF)YDkp<w#5b*=paBJ;
z`xp@}33vFn=O5IReEpqLxX|NarjVDfTo?6GXx+a{EE(ss!>K=bUWjuXbpvXs{#k^%
zasQtBWHR#AJBvE2K0@r27;G}9_%7k{X);2n8F<zuXB?k;iVE$$j#%co=%bpQOi3{&
z>chGlx9up!K~zYP5LMZibSa#H@>X*LPvIl=w2N!Tb>UEVG<VP&CL4WsiPwK+1xkaI
zXQ;<C)lxjGma}AT{U`iHV;o-yb6F|w&qeNesCo5vItVa|bGlxdbn{*0WFIIuXEuLm
zFc>CbwoGiL!+vyDJF3<AudGc8ZQtsvP2i8l6s)uKGfTKfs(n#N$!*`SCsDML)9EPn
zYq0_GZN6B=E7O|ElV_8P=fZ)zW0;40A#`kQLX)`N;SRVtV+tHjZr;vW!Ul3<V1b@w
zPn8;y4NPF5xt!RYvJ@{#?JB1>bM&?at6(H9br65tvW=GD26t8~)yI-(_x-b}aCYsY
zg=BN^NJGI@q^!nbIM9&cQ{oA3MfC&CY32u}!2BrY9OEr}>=7mD<C~vJ9;2aCn1~}_
zL5oXU^{)~xM2n@T*1t56YkS6Z86~;yf`!jcQW=(!G9DVu=s?KoWD!s5I+uZaNKqhi
zo%n)2Dh7xv_FinXrMt*<Hr0!TxZ`|{LD@O6FR3Uo3I@m5**$$W{k^{-SD!y)o>m^&
zVA_euqZT-Ocj?80fhSXUj(!cT4YNjtn<RD<eG0x|g-Qj~NIvxj364|X@_UPR%o<^0
z?f4FL<~vKb;_T4g<GS4)$Mn$Px7R?|47E-u=rXv+rWp1h2LH|kl74+kG{cMmWcG$(
z_)^bzYA>|Mg;#4Xe@=g()A?X!`hGOhnCxgVZ|sD8YJ&9|-W^Bsrco__e_NAB_*4?0
zY-kC%Lnq<re+~F!eP3BsEN(Fs;cZ3n-P^gMid6Se+YHvDbk*DJ-YcE@fpb~}Ad2Zc
zk@u{_zQ_Wo{jPvAl;Qy3ZG0m;W~RQDLOnt>G~Gtr{)8odQ2fcvT)8*RJaVFf42d=w
zDvcg!aRl4=ki8qvmrYpplkYkz61LozHn>=ar<gwnq?%Elp0V3kzDz<jFO{X$7+(<a
z(B~-CxGh(!fF^u86b@Nz-LtdlQ#YnggXHC$O1rhcfKxDfphOsb_2fdFgn5kQfRS5Q
zUNa%Bn!Jc!sT`f?#f&vE!o6Y)La9A3d@t-^&&&nF!{>&>CxruQ*9zKvJ~+L#;G+yl
z;rcq(s)HT2Zy{|VYvDI(CLI6NS%3tWVkAzwyYl&|%B^5hZptuHCp){RY(9=J<~NU>
zJk^4?_-~Af>KJE0Ny;H5X;F2e1sgLVb}1>_1^HZv)^9Q-P4xv*Ja+77OB3rB!KLEx
zO#=5`YHEEgLPgrKw=<}3DqevJ8B)(VzmROYDpOS)8Vol#+)Bu~toV$giTQBdO;OGx
zk><WkX%M36z;#11%gT9Xa-nZ<*|LH@Dc3KixAkb=`Zk4^uq(fOv%f=4!mY&9hgee!
zVGJ||k>9DvPJ^9nkQ+NiU-tBH@NUyj6-nJch8hUA5pMAbwPPISVM3u@2093Vynzln
zZ*bf1x%ht(f(-?1tMk5?265r9>(lIYT|eg$eWsVggZ?<1!4bp{B4G)4rp#PAL~{NK
zCgJB~pkzw7JU~qQZ>)*uDq%%;|B`xvIH1mQm!<S~&};Lp`bR(lGa$b7UoX`NMlIQ3
z3@5MUIJcp~jrT(j9$SRfa99;eUhzcSh81iL`9A?2I=1n^TFJH4LAvs5S&Jq-A1=@<
za9^|>PDlyv3mfIIQd;@~oI{*igNs0s`A=&4H<Y2^Og~gqHv8gAa>=Ylr>$Jz&f%-}
zPu+VLgYp|NlDt}$jVPu#AJk%AR{vl412hlk*M(nUyVeFcN3hiMWu<(e__;`a3Q1NA
z+>-j3712{bV2NZ=c;5RCvP_cGW!HR?Ppb=i99%?C=6yLWD1J3O`RMpl?wh&m-WC*L
z!qizUZA<PHl1i<I1K42?tf?Kmw3g(@OH(bo-nJ)LPsZM-{ulJ=;X9kUmKQJ6<7AB7
z4x`z3^}#c%;~J1+;SGO%_oi{Rj>8{13me#tX_Nt1G3AO*x+VAJEUkmQq0&jdSUq~V
zwq+(CAYuXn!&TKs?HAw)y{)e|>H4Q#ymA5der@zb9(DoGR6E4&oK>@j@7@!?zBK=l
zTe=&^^wGa3L3KHz?gG8zlgA(gsk}SyoF?Z?Swu)F)P>LDOv%MfL#VFm5a<|ZZ%tMX
z(1UlP=Znv*`+0>ywv?T#OX^%p6L{>@=n}xNnM{IM5ls66fwfYY@X~UZpVNgtYD<q~
z<Ye{^cC9iwzG%dhz?0<q`ISpz(<vse{|2(PB1z=K_Tt01!yR-)@<wQtjO|5d%BM9q
z4&#i?w{es^yE&25j4_F9ME6$s=$_=cY*48QzQ}A+HYbmR81yI1q+-771E?YDzXX<l
z9(7GhROz&%>0`8%1$ND(?w;VjdShArn}k_@)n->Xbo3PoEm7{2BPN&V=1<iws3`Xv
z-^jWlyp~vc?{`SDv*yA49<NOt8<`c>4d#nYjV|s*^|9!XTmftmr9L#+H~B`$dWjNW
zSJU1!0YXgEjpj}4P$G5SG0YWpX}#4Bg5!|{%C~=>G9Or*Mb^eEry$2PO(#xLG2XH#
zcbzhcgqH@T=sz4p^F{<P&$ZU=!*51TB^jP~ZKZB;CNgJ1oB@{hqQL9VDpb#hOwfZG
zLkOE}<kh>qQzsAz07N7KBrpyHJHQT$c_|U3^>?qNk*xswpf0s$8=9x5ejTKFL^f~0
zQ6+Qt05$YP>f@o`F6RoUDh5M8Lg8NY_t#vayhiXHsY|Q_XTY}EQ59%Xn}Tw`v-v9N
z?>SacVKPTN2qL#{Ai(~jGrL0_lSkPvzed;9Z$-F%1Ob}cN20q0(@_p<(-5~sKivfm
zN<{AFb8oap28={LW~ZJae_~M!RobFIkx~#v$sR*t)=^930xoPpWv|jd6u3(Uc(O@X
zYCxBa+d3Sr58^0j+QQs{B8cFdsDoHIm&bVbe$<7Nz|8O3c8qBH3TT^Vz=Nh4e|`sM
zj{+qrV2a6p?81KD4!BTI9R4s;yRr&_hy|iH=+hcx$zDUrzgoZSE?~Ac(x^R1hqFhq
zDBJk2OFvRS!O-X<%Q1*XWWFZJN?-?q37d9tHorAZ^G;W{Z1Du=Ljk)mp#`C2_l9&1
z>u!I}(j=O>&bj{Z5q6PzPL5qFHOH_+*3#qe@N!t5fBSQ{PN2VjT8p(*A3`S2=Z{^E
zQMCOBG-)6Gu7y0{=tr0d`l#RLyHXK%4L9j6#5HKlRZ`mdlqW@N=5hT2;4$3_ih8}@
z$n4Uxx8BD@m}E;6*gt3Y2QpXhpI#7NNl_Pr@zi-kJyzA(9arQ75IY~pAgxqS16nMI
zI|d-MQ8&{i_DQo?-KW)6t<6}@bg&Kk@=U${DX?NgVKd8j7oetX<3C0bOlHl^rvSRx
zM$1%!>B0QHXDN4;1?9q9w!d(juaw}5PJhR#wqRd20VYja_O{Ey`#vzB=6iV^zf*0i
z&Y4TyCu<F!z9sqgcI&8h^6zRrdqA`GE^j4m_6z#uc7qzuR~(&2kXq!mv?D`h_a5CU
z4f3=TzH>S5S|?=pj$eNPY2E2Us}0Q}wxx0sso5P@_LeWhpzpEx5W(;)PwGAEQzo>u
zX6|w+??ZuD3JcSzyoG*Z%{rA&yz20NQeeSj`8@L0`e|@c^sliC!qZ1xqc5iNGREtF
zDvpy<R5>2I(zJX*VdYk{3)jpGd)o<px&#8$vQc9X5P<MsaeV;cdwj5bJTh60W{xlS
zS*5zQ+{q9aD`$r|2i~i)N*BbO4+*#&2h$@^d|C?l73obn*0$$b^MYK1M@7Ry#Tu@2
z_bc4<W6r|fmB*oZWjDA6t!aE5@kOSll3fe;5;tGQN(enb?!WWc-^YJy@+HZ+?#M0U
zT(#DZwGISae#dAuaN^7lmzxcTOOQ@Up=k@g`@tr(aQyY7oKHES?<8=Q`PV*_X~_<{
ze+~U*`OLwFi?=Mc`{bGRD(*<7dS3&TKm?X1nf*b`9=-8CCDSF3m6ob8g-nc#CwvTj
zCBa`ceRxBjZhI}uZV&3SHZ~>PO*t#yo|+S6dP?7x&J%GyZy_~{``K>FQ&RA?;i9or
zQi+Hf+1DHIGj2OL&uB3@`Q;8UM1E&5VMBOJ*dg<8CH*81dIL!}1Ed8HAA6&i&R6c{
zbUCecngIJK6H%=*`Z+0{3KFV^wV<Qg=or<Zm&0qya;V(w#BrMI^4kKuTx=Z80^ySj
zS844$HVL;&QodgavPj4L`Ru}7nvtw(mJN<qJZ4;*MfuUwT5W7q`Y{+*k#BYGk_0y=
z{20}*^3BV$YT3q`Gdf7P&Sy0t$o?y+hp;l6=!rZ6FE}Bssu_i{_TLpV^BuMrEyZ*2
zEi5i}(HJGhkbY#FQ>_usOoTg5SFWsI>^DY7O!VpjrKEel;~DF;w?rxUCXPx@?l$?9
zH#ZGzyq`7TYP7ba<-KdImW&(>N`IzIOq$&8fHCYli${y&BOTq?wV6m3XroJb2@^L&
z*mprfr`MtepXD80o9c(p9ZrF7Eks506VjgL@#UKfclp1~j3HoLo%Uv?nsuzL*5P}!
z%o40@Urm&8n~k?6_}2_Sdd^!76p3yZT_1W(N$l>pH~cWyod{69n!PjV>|khrgM?Bv
zZ*x(Wsbj-Kf+pc*WJ<|s-H<PZ(}<4eKsS=zami*vm5rqN7l)W8TX$l`?xt`ut>Kcf
zG-Ugkp1w@3eskyh8{!lupg>CB&{3;(mCNYlaW-rJBM%fRnmGox0)JFILl0#bwb#0Q
zbFTg5=GnM8DXE0oad7yC$-o2Wwaw+$q}sD<n$Mejj*(j)dCvqM9x1U6Acd6&zQkKk
zVFhzmH0R$P^q~^`cAK}LU{DuzbmeK*>aF5UJ$9l+5o8-{xIRa%^rl0yYA&<RSY%hq
z8w&}MRKqZDf1M`o4yRP<vE1k{rY0IDpL+x|Ev36|BN>p7*N?*oHe+7*C5^nLDo*OV
zz<lgRtT&;9b8cmH%|4+Fn?~-l)#wra*d?k;t~5zT7qXiQ_})6_Zp{iu0n^Vr-pb`1
z;)wmmb>rv6BD&<c`uZ$o)~d>~0Y6GIRVyH!zco1EIPX{`aAk8pW|86~=?y#baA=>f
zO!QUko_sZ5w7fn~tcx)55k3b=Cz;!E;5~3F<C4%vhq?wI4-pA&+oGOK+vSGUyoW`8
z@wJy4*0n2Z=JmgNkxmw)0g2*MGt&dssr%#>`XP*6K^QNu?S_?v>=x+6?g%<rpYy%0
z{RBM5@EUf97Ff&*7Mhks-m<l@nP+2lqY2a4zk4MmuP<@u_%W_!MZR3ruQ2b`D^KB;
zPUv0ZlUwi&86+K3t?2YM9uTE1!p!x(Xlp^g(4|XQIdQ((HR0D_hm#nM(cv(S%@-Kc
zFr=T<7(25x9B^I0a+N`DJD5rsHB@_*R45tKUE~X;z92C%UxrI<pZz6!p$s9(oU>A=
z3w#vG?_GEC*Pjd(vYYkKOs}tP#bo7)eVQh|hOOZtEQKxP?T#B}heRc5CU;uO^T`f<
zsQujg#cZ2bH?A8jo&~Yp@9pqE?9|4HBYj9?C4$MzV{~>M4Wh;|-Xrws<xcYTXQRqN
zKS|6d;#7?DC8TPXAi`@U*`l{)%VnaLc!lok-fL8m<WP+H&1zj%_h~j&+uk2XEqZ|>
znI$zBEZKP3GHFArS&m(}rsEvs?aG@pvhp-`J$eXqZ|+WS@We<HwnrCN1U7ClQH~=k
zHqFD+s7JLJlZU!Ao%1a$=m(OwbHj5N*$?oZ9JCM_f6%QE7EkW~Y~Xx8m3j8N(3WXm
zEv)I1@XM054d<v#C*25C?$+AWL(VbM*a+^+ESzp0#EVX61Wq;yAGi9QfGs1ILlCsQ
z=i4$?hk8C}6a9;MUx&Hd8NssUS$|<sJA8-OCL&+-nVKtdH?b>NfYFS%pJ&xyqWw2$
zl-C`PW25*B6qRqcb|nRigfQ6)D;*7vCF|?F&$WGt#7Qi`cuKtX7V&mE*}}P^0dFvk
z!YN6<(eb<0hgxp4iNrZBbl9&M=42ax@os-X+|Y*>9@4Ql{Naar>R64t#9S@wWcTQ7
zhN(s#c&`ReOx1-I$;SFR91ItxJ{Vw|F^=dA8Pl5gC7xU1p?SGs8EG__mYEqMvZ7Gh
z$fi9V&$_}teeK2kUcpj@Dp9Y<Bdpir+Ii$@WrUx8WJk?le3mkbvRX}+SZfFlC{T~&
zC5%AX@YgH|?PiSq(z!40XP|(y*^)B(;3x5y^t^G_uRUJewjPM!+F_ArXd7x$7e`h7
zqu5DBCN(c<I?dfKoChOUuvVgVR1_V#9NY=?r9Q~lM{;PV-`Da}+-c8@6HD{rI-*pl
zah!HLtl}}Q9_bz{>byo+qZB@#Z|-DiNX&F%dozvq1;I@RilfkBV%^3ue-ir)eh)r%
z<p?T*u4`8(3Gxfb_4%jI><8J`u=|+pUhy|}Rs?Gq7lJ22oR=v5k3RK7I);2@3;Qhx
z0<)16YhIiu7kC*9bp|gEYi0IYu%o!Kj7bF8@u@zYNSldyYkDiP|II32NYnU|a>X1;
z<6tDimaO_2wNoFj%)yk@6`Du0RUR`jQ!2KtDs!Z(=3l<iWa~#LN=g?(&zjr>xnw#4
zm7`}Axf{)~XTMu+aIHKd3M;BFR>2bva^h|I`a{^Jd>OF3VE^s1zddwegFpHjjm<v!
zO$DbjOGGWuuAJJ7FA3m;j$e1GaxUSJnOzdKcEF#)d$Xs->ih_+DrUAa@H4Km9&7In
zZ*n)wc>E+<+x4ibLtx~J8e|-r0IgJkoWfUBwC+=eo#mholFeDOJLRS$uSrw(`SuCJ
zPHr2?Y(wkHC9+?B`M(0GG*gld?CBjow_E&D9qb~BJeqL9Q5B=v@=Q+J_@*K7$)boC
zLAO2B`=ruIYGhW$u9d)HRFTe?@X&>)kD5H{+%bBz*2+NcqgQEg@&c9nsQ;uU-s3S1
z*CJIB8;zF_M<2(ZCj~1_l&2$aj#R5+>1ef)b!!p<R)Ex#rkY)a_-l`ZeZz19W-SFS
z!S2g-nfG4xoedWJmq;vo1#BSWY)rpz=7;8B_4rV4pqSVFNO|QR2`jMS`H?!NlZO>`
z#-NG2R6>}P3d{SIvx;NuRJUB!iWQEiH&QZ~6$l4E&WdoJ3yB<%xdn1g$)jJe$@3^u
z`E@$RTj@iu1_!cUQjO43--;PivKIP6Q(NC5k@<<QCo!4$ZpfCfylQ<sk5Il{FOSzx
zFWO;N35H~Se3rL7ubX0?X%^$6lhHOP#RH@04E6@~Q`KX|L<hAY$LQ?AZ)%$wf=<<I
z!G{*TN5V!4)MN$J3UgK;JWJbGZ;dd-7cTsG$7IDu6qk={yB6-bu^>b?9k-z^VXkOQ
zbN(Z@VT&+cA%k*#G+B|LgLQBwirImg#I6syDCBc_O!(-Y)Ym0rzr%r?7wDAV({IFl
zO;53|3UaL59s`SJJxKhzqiQVLrDfKL^;Q5Eh4uK$bs{kud4=1yeoji(_x(@TaJRpf
z4BZ$WDE9G`s|oI<HWaz5AyNK@+Oa`nO}XWMfq{nnYhoXhr5K7Kv%PNyt;q*-qirDv
zro`bQKPI&EM_c&`PRRLPIyeLGi`MTxzWL2JX)q4gFE*2%Zq{fQ^@-vqmZ9}WZKpUx
zW5*Fk`)7QS0<>{<A$y*s+Pg&SqjU4DH}P-JUhj97fm#8#5SC7c-xdEw41O+q*I4Uv
z5+AF#iRX61$i)Z9d+X{1%$C-avs&Lwx9RUIc%!Q%X;*j2`l-KBQ0muroeF)P<>&K=
zEejU0qR0Jo>dHocYC#+o+WTc)exDa{yvKys*1A1PIW@FgWkB33SH6IZcRrm?xgmm^
zIF96{Xy$1QI-ro`GoiZ4@QA!oa60(Lr<gyv)bUBwMzJqNW-I#0uAkB@^AwLge03P(
zmhn(nXJ>gYe>&Nis@_FevW$Py=tT1R)Ei45mg`!tqArPhRTFAYTXVf}8(A!pl+u|9
zE-x2sCT|f4f62-xRXGcVaVj~?yUx9{)V)_uG}~UD-#yV3%Uf_O3%}jkGG3g)UfN|5
zWs}1djx<jc<IBSRK<<wy<A`lGkkGl$PI0)|f`^B6+?twJ2~r{MJ|Y_mr{Xz5zS$b(
z_V4C<!LuHd%##w&kgIK<!74gozp54~L`ohPG=gVm*0o((7W`<Fe+HfI-X3PX%50LT
zHza$ph-PvNQWIk-Zw%I}2Tt(bU6qOVN_c9b<<V9wByYjDv(qzyVj?nh{1HhURZcuM
zI9_rq!h(PAFda)hmC@2s%cgOD1S5{&jT;-so1n*2E}a-O@=Kz3DUhG%E73eIZU6A3
zGrX<=ZKt#N{l6HG#5jPRS@yh0W^||-6n$mQtQ2kfQaDUV9(7{AW*mKd-I*g*UEHB~
z#4_j@qTG?Ut53Rf1e3m#?fE6^JS3!%XSpbNi)J&i_+!$Wm<^$6*R|g03}cNHa5?1)
zMFr@%HgDY9CO1lWB!JJy`y!X&tIPL`6^RxP7-$i4)bZ+pEb^--kL$L=v4&U5c*4Ze
znzd&$iJ!;(_(HNqp62LOf}=vJUAiF4Y(z0~UAl&Q{zK12UQW;d(%Sx*Wd%81Gvuk~
z_5<V8QKnr@M;RG2h~^D)x&kBN6%P9k3*|50;91uZSr5=s-qy%nNA$Oz<*eCUZVibr
z$-}22h)C%d=@(vK3QH}Ca$_CVd_<e`l5>3xMZxm|hxlUhlh>DVkAo7mqf!tq_G1oh
z^kpg}qvzow2Rpa;Pb9*{+nNTK3(41liHeSi7frT#h;1#lHoq9P94IJMlvnj~U6EB~
zXhN2ov+)wfv(nB<_+SNg+OrE$=_^d}-;v|tK^P+MF|)tk^0HQPD1J}z1J|#$oG2sc
z;fBRanR49iFh`SF@s~EkJ6qb$%nh?ZVixX4^edm7I!2Oy=Dl)fqccVV&nD=n1F0Tm
zx^0aLiwWX10X5{q)edU4st>dOrNJk$`$jK4qj7q%Ny$rFU1(7gpLV&4udmYqlQUS~
zD?lNBBQo4dt3sh{?BoUYyuXjg5GD_{-Wg%@aQHVB-A2?j`Fg2apj+Kf{+nNSJtnxy
zPuh;5>p*nDz@KJBba(mnB6K=*L?1&omu54KInM~*F6Vf7w*HDzlrh{|r0`6)^b3kg
zAHG<^7ZPH+#s79m%hVg4O4mq+DblewTV@*(Ij+!`l6JL9GH>Wi3A8rX8nR#UV8!Ud
zKU-hE?38hL<u%@i9OWNyd2Za}_yRf=mOONtUE^wbfg|0GxI~(WO?H4c9NXUfYJnKi
zP!KqB=)HVB3zp+V_?!{q;Q@Tz1EAPBG|87$;Ol7Q^U$p;OZ4R2W@uAo=+1K=a&C95
zlbOr!rfO#s6xPtj$|CKmSh%DSeejK*TJluE@pjP;^3-}Q5FViDKu=U$Zaxafz6b}o
zh$KYME`*pQ^fZ$>CRbHhp2BRo^Zl#w2N3mEpsNO#Y0$8A=K{F&+=yBk(D2+q{~lbI
z=V0J8puptUx1wMzfl2^M859jVn-yL?`_-D;Ml|qA4EPn8zG&Y&%Qdtt@m<c(-M4e-
zmNvJ~P5>;d;`#@CU_ZZpprva>>n8typ9hz4zz!6FO*?L^oa{`Q17_H43`+dtGF4E%
z1PYP}cPlt9DQnF}(ege8HQ-eXvGzVR5;KEL;6yu!N7UP0(ekfyksJ`zVqno_TN*xn
z7zqUq;NP4)J~qq|pTgrfB{S{W$+ZRn*iA?qf`ZO#X#_-m$@GnYDu7dhZ(+8-{|91%
zrb#OW&lHY!Z9w-Y>JKC#0BR!0c{Bp!38S*$g)XOrBgEYE=T%4P35Deec8nb1C=>+l
zZ|W$>c_QFEf1@Q%e;Nv`_^@Jtz+Fq$`1l70mMYktYv1``VgenNEImPrDo-&5{m2QT
zT}%>vIA{w6{3jausHtEKgZnoN41aT@@61GU_WuFVXQJJ+2(kY)uYgZRWD=kxYn|1!
z{xRL!BihIsz<aC_kG`I+?7zgT#5{$B*V?p#g+qrSpYC17d2w}(X89Y7>?h((OPs*Z
z4`tyR+0x0;HusW@L{s+$f+k6YhNAS}KUO$P?#Qj2g)_+hstVJ}{<qB*y2OKv`8xel
zaM(W5lDgzAA`tpdoPShM`OxgN^4II0DpfX%KT#L3U}A|rxWPlja$*fw?uptzsjdMo
z`c>@_zVB6^=24o}g@QFFEs@$I?~7=ImOVRKnd2~)Ob7Rua10%nqYFDAq$FlT)bO1E
zFp+T0t_MX<ef>MpT;XWW(7#^y^0m(Qd2kb436hzw_TT&4m}2gRxT<ka7$3E6+Vf|B
z^c8$q9<34<d_C8ya`}(m2d5W;ez_Y3%jgAxg|H6fvKz}y8=yh)y22>w%@_xnn*=5(
zRCa>`3p86>NB^2ieO=zT7I{HijwkNV*x1nRy<|W2-URjFHob^*{0_aaqsrD;3NQ4)
zg7QKHHlFu#)8OWr?42K9fQPnO+YF*pbgg!j7X}DT5d`&LOevZ@=$wc#Ns#gt(B76f
z&@DoQ-V+R$fl>}b(I9LQont(^1&uh^O3_U~K*5@R9|Q#tq)$-npv;;BF;$805CI~-
z8_g1d69O;uz0;!&NM(OmX>|i+biLT`z<2W-04Tl-ZoI%Zw_bu>=M-uOxBihlL_8os
z;mCUgOhR|8bVV#=ZYt2bAwT>Xl$te#kiI6}`2w{ys~n5gdHrZK85(jZ_SG%Hx7YY*
zIP+{y(84^>aDi2jvgXKF-2|;I19SD}5fte-aQkjS!ryxA<C`ZpbA3S{1i*K-KD4X9
zWZ~N17Xi(KSa_Fh2oG5+g3Ktt8Vq!u&J&ryzgh#A3={;1<3}wv;HhYmGyJvO>{L&#
za`*gr9F0+58pV}v|EIai6Ph%7@%QjU|F}yf0huw!-&|G!0Uz@*`)_G9T?}SMt`t2w
zm<>g=!`qQ?n+8wcEIl$tvkmIdU2R4a-b3kc4XO|qq}DMWk5hgC<t=c}C8k`_1v#|1
zZ%G2iHR%-W&fTpYLJ_icMpcPK|J+YlBMNc^GSQVvmKg+4;RjJnTS2Hgc)W_Ky?x6H
zMO+BkW4VQ1ryYPMa8y4@aTq#bpz-j~L@fCg{%d~3v)x$=*JyI%f+WMg)zJmC_VZA~
z^gcZ1ukeMu<EMv&i3(K9&sAN?!HNxfXElG@(EX~ewj!pa=Fx|@UW>e#jVcub2BQ}g
zg(^~o)c6(umXgyEuv&23=ShMi<>Ir{_<FddmQV0SlN&)<|L%}%DUDDDnf@6_=%Qj6
z;GBY1Ul#<&`^e_<&-xR_L0_{_PY`MXrf+yYriM0wmd|3KZ6t$Ty%g%Dt`t*7ZYI!)
z;{TZX6@HW1|B3;gc?a0GVWzKn&bJyKJrp>4*^-eh6RJmv{%j;aBZNBQr+>hh1CkB3
z0(?SrgB*O3B3r4@FYiEmQUUrl9S&y~JTqT=%=mNG@?3aXwmBnoJ=%j(`r87qYVbOo
zbi$fFJc<a(p<z+@dN2qt>FAZcs9w(^iJ73~P7om2oBW2se~zusM{H{-C{`<HzslxU
zhcLX!V}wY|#dyjeT6pQs*YX-1rp)S^WOy^nS;Bi9bq!xqC7TK`(k<U;oypHd{$18!
zX+*$d<Qn;S@*kZOdhU+U<!B9{{70>e{#!u}y-Fi$3IA)S0K{CrH~(LwGW67&X-PNu
zSP7oR|0#q;w7<i5|M$lk-Dg9s>GAxZmum&%KEhxB{F&`<*U04#E9aZYyu)8*YRT5N
zs*&A?3VoY<phC#w#ip5yGGnuz%uVMgCv)*&eX#R=`&Zwb)7_Wfx*I%Q8wx)3*u7*f
zo0~4UIa~a>_)z@Q_n*c85meD<Sb{!7PK|X?fjHBgQXJ&>K;sGn6WFUHzW0694F7Y5
z+yeZ=06568wcGj2h-rdLjZrm2D}GhbJt&k;U$w(kK<C9B19y-t-w(jAYn_#WMVPPT
z_rptYj8P!b)JmllS@FYclY!o?9lKY;phduusPWqZp(Jd8?$HwSp#Bb2Zy%tWIHo2q
zz(^(LhZo1H-ac9iAm?U<PjChZJ*S4@{{yS=hkrL_TJUqjg_r&CaV2;ksiF9UZc<2z
z-Yfa?w8HQ4BTHXwl1k08;=!td*Q<6Zt?9=lEEi4KWVOg0yl=xFNga^cXnzP@8#hcF
z3ek6n1slFj?AxXZ*JF=R%C>|iDm)gFZS3USH?cx%HKj1O#dI(#OyOKK@KGK}g1|%Q
z%E4j-BFC61^LE(MIIF|h$m&gk`1}71s6h*it=6~s$4+bpvxsdmzP-xE{v?IlFcPDC
z77saLv?9<u4594Duuu-*!98L=GC?1@0lv5y>bxXC&MgR6ymPDfxivXACH&LH|Ne7b
z@_1=D<9ucoaeV|9%Moar5eC|Zrsd0Z6*4y0>+rMrBHW$PhzRu5DW&`i@B~j!H<P3~
z4WREAA;E2!lb{8H8>m##*bqX|JPTaQR;`2Z$Fgh&4-iVlrD9AOATxddr6P30jl98o
zGHm;x+4Tf}>zXOLvB#fZpY8>{@~AoRd}!7FUKf@ISi!hxLx2lVB<|S(^YV=3Te*;0
zBNY}4ELda1fQ|rbOh2eL8{1!4p;ZF04Bp5CfuV;f1Sx-h>~)^}`IE7$i|W<Vo3g(*
z>|XO8{Q^cD5Wu1Vh4(=CzIV$6*Gi|z%VKa%PXlA(1ZLiCtN9asfSrpf6iIlU@HQB+
zkx@rLkI+=+tG_|A40`ZepwuvbNj&lbG~zCfoZ1DO*}8)#?5*w+SI_UPT~pH9H6`pJ
zMqe%aeH`?96(6lo?n3kZBg}sC-<F*+zDP`!upywuM-#wHT?($P07C}>poI4KWp81z
z-1nj|+K*>X6*}BB(U>%C)X^+(s7#S)3HUB~GlsxgbSaJIONZqY+-o)iHbig&@i#X=
zbDj?)vu&XGeO-+WBu#+S0y0&*<L90_8NnQGu{Qu{>I^G!iQGk|&Y+^d<+ch$PRV8a
zD5w&B2bXfk7nsPmKsUb8e+?AszzI3&*9!$d01&Na-V_%vc}V-QL%XNz(0}DMgv}s_
z>48Z)>o+_I0474_X?1$Pfi0f;)WmgwfI)%uiuE19ltS%0=$rN?&<*}T&cI6M-Y3vF
z!)BmqAFRJDuj*inkBv**RIkDqw7(GJnvR~0ufjg1m{*D<<khps=!Xi#ykL9T^m_fg
z8i)KyIh^vwbiOc1zDJZ(+R<HI_IiASxEq+R=F~zrJB1aBbqS-6JKx{>y`%o|)el;?
zz(d*c<f>6Mtk+<T6=&5&t4TsX010SM+7G@DrI<4~VJ3h*O5cOu50xob*rLIJu=Ctz
zqh%dskc9-qCANxKQf4$~;vQU=oGt3!c82<kTJyAV;+{ktqyF;~?GX|pXip)#5l!jc
z_tos1i@5Q77@m6@F|_HP501is_=E<wz_ZtK4tkX`s7t<MEliOr5^}(L5qZ!5*N3Zr
z&gKA>5)8j!k;B;xT070Vbr9SNUUrDvcv8L#PkRwJ`X<jwuzp&D4ycd&;2rD)0u6c-
zvH;59NVb93L@ylWc8m9V<ybnJC?*W3sO+Bq70pOf9yPwCxUS(gW7XzP-cg2@GTiO;
ze|O1xY&vSg4OTs0(?h`9phJ#Y#Al=H5EOMJbWw(CM9WI>(=-uV{s05jyfWfXODKpk
zZ@|VpclNC@h+F2BNpIDfptw0I8hx*^y)p<!H%@z<xKan&_YN(^F9olcpyfxhLtVwW
z1UzM>SHgO9JKqE{q=3E%u*V}Ou%+_ief$CRA8a_Dx!wNhvB}uJF6}H>zX^OJ3&fLZ
z=d2l8E-DKSEA}Q>p(XSYd<4Ee6CyP&oRr~qiuIPKv+1&33(L*Fkn?Utr>Dr6y%d@8
zTkaLnafsLoK;Pr=NI-L!-e4++5D<wEI>&esd%i&P`^Z1Q(CN`x{#no#MDt#da|fO9
zgdh2*{mcvv_lHI}Qr{<GXDbQ-_NAP5LZ;Ycr;v$EP6X{|>t;usT8@fop>9iw-;TLA
zr|=c)+z*}^r3d7XteVmIvKJQ26g_ky$B)9uBltue*b!%VZ}9y{@C}Lm$U{R}^kRtl
z2SIqb)whqldD}}&W}@EFV(9*k?X4+nybCJr_6ls-gq+U%%h7gWhvFl2Y`8mZfH6k1
zNgCp<?R%-s#15=iD-YwI5#_{+w8~Kh6bP#@g&j0|zrv|ATU|ZwCHtvbhg1`v^6+A*
zn5KTPv)%k0$S`4&8Tsps9LrPGOOgNEJ}5k*;4YqaBxF`v64u>Gq)1V{L1|VC!YZ1l
z42FjdxNWr6>?TXFkv6ogDfTDq;O=}*L*{nw`<}dh^A|aj3tjw$KvksEJL*Uf1YH`r
z4g#O$25{i$V&v|KG3}OE7TxDP&*`APR~1B=Kv27OrN`D&5!T!AiEp5s#ed|FUUOkS
zOOH_4aHb*L1x{8VZaJ}+*78WOg5z{Tl8E0Hm)kw?^61O#bBI)0;U~IL&#{vm5{wFG
z9)hnWn8EPsH}>28P+N@eh%ScCJY2Jib(XoaSqTy8v^KtjhU!c%Gl5UIl!!Xn!ve&G
zdWnb%>RV)cvu3T-*sNxDU@WMTF<i~Zt{>qJac4yywqeM!6^ElV#`v7ZcCEhIc@k?5
z%FhJUt<oSrMJm2}HT+z61k1;USaLUT5XJCPPmS1q!<AduuVB^7riTW-M;o{rfi+VU
zg9rgz(Pv|ZRRJ(HC{0UYV^}sRMiRee4^CnYBgRmPV2whuYBWiU@?peS@FKvN>pcQf
zXwN~*6h4+R1DHcR^7|Wl-d|R#!~e!6v#d?Vs<A|6HACU0As6v&x?J_{tGSQJi}5Z*
zWh}7`uyPkm2r)f_3L`3;?42}k@NR({y5*j=T%eQ)S%zA2UXZ#KfcFS|d;Fz^CaJsU
zQpq@rE6P7M59Cljo0FXb9rmOUJ#L+t2Q<UMP7P}?P{Kd8g_e?mRia|Z4VTJHI+PCM
zrD~TkKGDksmhm1Ev^?yQ-L*Cl@bpSKc)P6%)3%pgba-9e#qeh0O<Iry#hG7?zvv0&
zoPrY{6VE}#1$X}Mk_*DhCpjD+SzjR6i0Q<<&bsL+rtSCdpMs-?zYjXn2z<;ke4YHD
zBj7<zN@+Fz=dJLm1+<Kc;l8p=La&9D40~fpU)v8&l{;244h%CzlAQq*(ldM|?3;4b
z9I0TFTqr=q{jAwxhL%j@kpVwc&uc$3_!^u#M(q6g{`AkG{DWiYWW4^QDS)EnPXy*x
zuY`q+`e7n(L(>mGOEP!Q{ng?SF^itxi3)89LXOt1dk&iZh8xmf3u@ynAMuqY6yf{h
zB@CzJ6wzkM+W@box6znYuri(@R-k3tnx~UBmdqb}M_`&CW1QJBNz&fVjVBD)UL0)>
zkf-Qf@~`yUM{zZ{)mirbrWn%;mY4Kfi^^~up)%e;w_q&FWZ=H_?bF+`Vzb=WR}`I`
zTOAVG3Q$5;o7Z1N;zz64Lr?7Kt$~7Rx(~g}2Y|#JF1bXucO6n+UY~R_hbZ4%)Gpo@
z;2?i1a~zYK+Ko!FbVwHZJZC8r6)D|ZfAM@D*e@AXq6)!qN^JoYbbOBcy??r<%0?ii
z@n%8TMD%CEjvKw)u1<46gNwG#@1Jm2vM3!KSkvU*9Ci$RMPZkoYB%Wejs`pXw<HTQ
z=nn6-E>B+exh5B@xe=1af_C(HO;S%E=J$liU7F&1Sn<BjiffuZtv$<{FfCBq&5F+3
zY-yrCDoFZ!zWOJa;MzeWxb2pfTV`SUxIKV=-5IgC@3+TV5(+#kK09{)tBtq0NvG0C
z<|g<?zQmKW^D$vR&9Y?7+ba#5UFhbKE*LEc$7&4-B3r@f6v?;fIehk;jidlIztD$C
z+I!QerYfw`@KM&!2USnT45*K~wX^h|+H7g6_Z%;Ho=Lx5ZFm%Yb9h1es;!I>an6nX
zHDUdcXV4sTc;U_K7Tzix^7zDox46iUM1vnof6AJ-cfJCckMEyNt4SYqlhHfO4Akbb
z;zCDcS(*G!gPWXb#5QHqk|o`Di=lL0h$qB)X?VSx_Q^`?s+sqdCH)<<L|Lj&B?A7;
zkJL-#;<`F)$vX#}h@W8nSg3O6GDlVXj|G69A_ppy2$+J;?bkQzw3mr4@$VN`$t8w<
z|LA26zO}nlu~%}1##<&9Le53S6RJdub9Hl5ZFBwlb5j$Bbi>u1Ig0q0KE!mN@C;2+
z3v%Sn*6Kk*rT(JYLC^TrG>b5CN6J@89+rGh7un3BpNf&rMB~t}MBvS<F}^lybzAQW
zDiC&M-#sW#dSFl8N>;|LdigjPom{onQ<63`v2ZCK8Sz&xg-t7+;`()`h@aTs#k4nm
z56``t@WO1$owbc;-d~HfN=?VOOmlpBV&^lNug}KFH%rYGnmD@%XR{SHvvJk?LI@pK
z)(Zwg#Q8g4!_CQqZaTBQbxf1-7fE`jc^*n&(j4d{$4-$nkusjbp#F}!B9T^<>XJy(
zbQk_JeSGn?@2KSD;lLPDB2p(M(>fzWtBrq;QDPFtWd4y#O<Gdz@v%p}b8eXv=9RwS
zGt7rAY-EC1`L@UU4mUY3ET|RdCy|kw_tL+w6ko2wcm1A9?^>Xs^H@hxEMJ#Lozs}Q
z=!l<d$*e|?Y2hnxAl7oTZxB_O;L>kL=p$!(pl)zT8=->LdzE}Cl06OIvmzwk!T(r4
z!7|CWB$%a67g6x*Eoy_mgN2ar69;Z@fO3@IwQ9y3oi7}UH&A@$)cz?)JQPV}$%Vz}
zSU5w&?!zp1sksOB<paK>O^A0PuvNL>u;>_U<^Rx_wTS$nKR#SdUE~KMWCU6)B&!wE
zl&Zj)qAoAc1r6m>J3jU;6|qQjSwh;Wnk8g+8cq-^bD@&$1(iM}JlGa&(3B_C{59BN
z*{-UDF?uj|OKkxI0#6ne&s;rh*i|{Q4XlgY(8K2$s=;@4E5zUn#OG=w$Nqv$9e*fh
zMpv5L<`_-rboXqN3>JZF(_M6lqD0{gInyRsBkPo<b4Z!ZHvA1Y-)q7f2G)y|EcA0J
zwYp{6*66^NFoHC%@j7k95SfCr&_WZ)Lb;PgLfV9WVzB>SWl`KJc5wHNHJlh3eVx8B
z#K?CDP}2K4_`Yre^SzcXSSD}Kg<mA*fzdfdU|d<YaIC;H*%HDGnYH~An~OB~WB!X#
zA&c#QIR^3sD35oP_Y?oI{f}f|tU1f$l8xs+(%n&EQSr<j(wN#;F}n6QOt(Yeqny~%
z5S{+Lg%4k3V@8m<<f=vF4xUvCjKzenUElQQSV~<cTYtiG`yso;jG^*mHe?iBLf8Lc
z=W=^taZ3Bpd?S)qwR(luFcNyHz|;}X<jp@xlGCDr0=Lrr{%e-@Y`1F|u|GrRGjrx{
z!R_ejX&-Y3*InifZPG5qUn9lGn892J^}g38=$6IH5_I&wS3AF@^?7uVcNV}?clkhG
zlv=+`{>snm$}>C#)R*l{|F!&+U~(yGOLmAY;X3$kKB>6%;l&-;YG9wgB3(=s8vXdF
zrgSon9#Vd#LA21Kxgfjjq^@ZQ+Q7^|nqJ9oPo3C1)llBO;o$SvZA^NR@ta*s-BqlD
z(Hm2+^9#&9G?w_c(!kAzb-SE#aS7(#(;n^@dH1cn%Ou@byM)cEH2U#IQ8SQ87PuJ{
zxZ*-vjq^?KL8kOQ=^H5uu8=skz0F)1yy4oUc0wTvB-wuEar5nbE13bJx?sP5=)DGJ
z2neiZU(PUyUk{j9#A`k{41*l>nUHkr69?BVN(-K+`|`js#ML;tg6%U1u+3u~)*T3}
z_9}5YJiyNJuC%`6RpP7DSD24y7tSW<A&`%JSqHvODCc~A5%nlRT`18DLXt}18(OpH
zP#+fad_f#VS9Y20H3T$uv5-ZqWU{;qwj?l9tiz?}{+LipLoRuQK)V}7a;~2)IUn^Q
zw+||F(4Vsp)q-Uf5)iF7&Z|(Ec#H!tX@xSI)wN&`VD4ROJs>dry&WCrp$*ap%Ne;(
zbeGVYYhl*Up&#n;Fy#*ixbh92gA;W+V1{r0BE{lYlqvOlCCCscMK3mhNum7P<)ukc
z3-&NTCqnk)bB7<4rcA-KDthbu^tfC?E$5#{`vJ#5RBoL2r0xdii3`@Z*Zp_G0tfM?
zV3HiCUG)kIrU+3xlAeWNw++C?BhsAKjNtW}*ehKQ1)+Et1Dd5-G_BOP!qva;v9Jva
z^X*n8e0urIG2G=#01BSBfIK?8#a&RZe|>TD-(Z(C9NqIdW_$Yf_betu(&rcGM4}bA
z-w$5XoJAAf(`=d*(x7yqMsoWoQP4)1Rfrk)4j43-M>^>sV0{C2ge(`MQw{D!yla;T
zp*N6Wb@Xxo#MIy4J&XDj=a|p3M7*KFBfl}g^@%i(*OlmEW&9L`y<vlqIPWg!MJ&s{
zm-~6px{-kB1&KP^#ZlTTM*KClISR+bKK5ewW2I{fKM+5+Zq|#*xRktebfj>|^Y=95
z@Xx)e#VMI1NgyVkm*TZ4j`L^@ZvA$#7J0+~f$yl_%`)kmT;|u-b83~))Y^C0j21ZF
z(#ICTU%=&x_bsW<t-k%m&<!Uq1u}Gl@rElFtp=oZXzzy2Q#Ab)yCgZ4WA@F?u|R(F
zdx+qhRI(Y@l$}%R(R~Gx6Ak@8uA2#qu%@jJqpS(0ZksZ_&eYaHor5giU=B>>eAsVE
ztGqO1%t8X4IGiHV8nZO6<i>?vP&G)Y>*;Tr#wKTV2{}#LAekA*GUFae7k}P?c7~Xs
z4yRl?aH4tk;Rd&xW@FQc7%4<#R%RA?_e@Zi8v{o@sy>u?kuG$5GPYB}lgMjkh%xc;
z4j-DrYOu-u5&YOReG{F_4c)u+Zi+hw4gs9WslhS=)Oswh3eVJg8Jy2fafL(Cj*cZh
zm>D)M#(Qt`3u1Pi^D(tvBC&whp|u^*7U#B2?(+beXU?7+VdWHJ;#JU9DhZgIzLQol
z#XC^y-JbOO3h^E~D1vk-TfsbL&<r{t8`ON435`ZZ&PXh|Tt7h2cNYK)U`&@3!=#~C
z$39dFmW`~m?(`NA9v+G&pcD%2w!gW09f}_u&k~`v2qj5gc~?-D<V|qj3!}&#N3)%R
zSD}%PqJMSq-Q;330%pTepM3v90v|qFQWautD%L;}mskp4FR8<}#nqM|%#64R657X{
z6^u4V^7P&i)(3N}i21C^AuveR$q)Fwr&hjmci&*)!O5|7w+P_8U{yFPqNXH9gNN>F
zU(Ad$TI3xVdK1f@+e*2}W@nn!Lno7*bwT+9WRuYCrEex)gDDXyH<3m>^jmZN;$6>g
zLlJ|$&Q6xXVG9_oF;+9n5_E%u{xV7KE#jHeCMQr?X=ws11vFBLx%uY$mLMC2cBA$4
zf5LQyL42yap+?i@l$Z24>Jr^jex*|(orEqt^Bh6=oOCS|N_ZvuVdmvtktN3et-^;R
z$JXi>aDu_9m5_f)P{+(Rm@+r!V0fESgp^T=%q0ro&10nrYkgfi;B4>O2_RaN%fC2%
zj>&lvw)cvqm(PXd^WL03k~fieerLolAsO1d(vx0W!YUl}(y~Ccs1sC0T9NqCoNjZW
zbDFS$rxjW88e-S}V>y}`t)O^k%ZV%Wf-?8#)O=&QDkUi@o{6%A_R#s!_{)(MO%9nu
zk8}t_iL&@=4$!#NTI~0e-y-pyXh&8pR|4({>lu;W%EZK`Zsh8c?IsrL2IuolFkkZ~
zrH;+bh~O~@me;1^3PquyBzb7m28jdsV<3bT9TrwLiL)%2Vd^e>!$gDdHIvVY+{<{z
zPRV;%nWyBkqjiMAEAzr~ihP&)m4-SD<Ph~mwF1gfA@rLy+CnVN@B8_pzjG-cN9(Rl
z%Vi|oW0NV3=e|B{5&nyf_34~xvB^@nizhl2`xw7R%kq-GokfUbw}NEXMyb+c5kE`;
zzT#GkCB{IX46HAxCr`F_BCT4FkX(qTPLcjzLr8p{HSG8;<u3aRs%325_$yIPGAXKl
z=V=niK~q&l9pgGjMVvklJ~L@tKz3aaB&)8r{hdv;Vxhug0lI`%(kEI?)@-O{a-(Sy
z0zYfWj{UuA46M~*>^%H}qLkuIrcxx|gUnySWcRD$h1oc8C5bS%mckF$E0P3;B8IVN
zC-_>9^M_Yeg`RZ37dc3BvxJ&!Z)-9|?a1fZd-oF9#_bf&n|UN69Yn;~pXypz2fqj#
zk8>c?w4{DBc<Q!A!ym;PgNd_6<WFWy)4uVp?dW_vlyJUsyCl?@@KS4j2#xJDis;kF
zzD9cY&*)A#LVP;+RMh>F$?c*~(d><6_M==aiw+cjqesSff>vfnMO}$nqg6{Y%Eu=S
zq4gzeU#6!J#}3}A297;XzP<7kb$em<cMB4+z@mUb@crIqEk#dM3gJrz8blkp9epov
zH9`I{o@nOx301{j``+Tu6?$ZMl&%oe64wp0Yi<iyvnciZK4nqoGvi@(8@epPQBf$A
zUloAAz=5@P^=zB+JX7pBSBt)zm)1k+Dbq2``v|fUxfL(Q3L+luhY?b2SWoq&Hlowy
zf8bj)u~CPW?+6ny{$|6f6D|>A3HOzBjT@&_#g!{%B+?P$pt-M3Fr#O|4|%j#q>(EG
zJ|`UmvUs@r$f_fY)^MW@4vZB@ppokoSL5IIQT)`xb9t5^4<&Mm@P$>IRf}cL4Ia*H
zNyatPoJFjjq7{|=v8YNr2RTbUab74wp^JpY1Qx*t{wjo%1ngMW&n%+ZvoOn*4MaY(
z-=}CA;Rr|+a#WfvAY^5|;*b-XKZ{YNlur2qG4$Nf|CEukg|m@@PA&ezMM<JoO_c8_
zwK*9^E2s-TH;sj%1~ikIf}?f%Ih<$1sI?KH**}`iS$D&gO?4J?3|PGec88=Si0$QO
z3Vsc?7uc~7BL%`M*s2d9fj-VKbzcwH#+cyQa{etc3Ize3GzzQIE-e$QrmISo%W<80
zRHU0xSp%FcIu|9`vIA~}Kd`45@kmkglRh35u^$=B=u!C;sY>o&Y{Q1D&lrOAcvL2(
za>IO07bS8~i@4k(D|v&uebx^DHCs)JSru&q6^;07mBgS252psO+*n^|t7SA@^XbV|
zy^n#ADiAMM#+34;RaL=YAi%uj=PA@65V;z%iFhvbc*vgI#A|7ootT;2^+GKrcC_^c
zXTf)vH*U$rJR%_&z4p--alkIH_&klcNm_(Lgls*AbMDGbf`Xu_wPJ_ws3(olE+3~8
zWC|Y$-F2r|AvQE4$k{R*?sjGW=}RsWusC*6WQBmTP?0Q&xdIO-idm+-&RTW#{)grk
zwl`7?=@S;S_*lbANkNPbsqV@P{%j|?0}quEl&^@M>t`X2rXmKv#GKyR2yWQTCbIM7
zQ1Hl4DH*{`c2~ZJE^FntAGl%b7`>=e`R83Y!nfy|SjQ#sd0ODPOQ1C3+tYljV5)jm
z%XC*!yIVX><zh+xlj1o%_AKk!%8qQ+CYvMFE~XZv#KlY1Ai~8gMe&Gcp>E2t;=c0V
z{LS_y9{2U-7mS?;^^Y;GoO~1*^^ZI~d0Kd5W@zkl*`M=q>sHHFq_v_Pjn|NG=m@#`
z{a`-Q{f}Kkc~0y6MZL)^!!NZpXiQlX6Y3<Zbo4l=QsqRa9mkc-S*z-CU42^Fg75kL
zc*UY3;nLyCx^+<O>MtlJ66QQ4rnSMNA3IfN)gkzc8l8^(I8jJ_*}>E<0D-Kq+i0zP
zCu2%aGUhg##bi3dPJxYgPgpBQ?IlI%V9A*eiN^I7`J*f)cPar>pLQLu@UN)at^#Jv
zw?eF8D8AE*@n@%NC60rRax6LPND1NXH--KMtanGgWW05?INBXNNO1n9ciqLBM~Oj0
zKQKKk$<q?ys2BcH+?i(a+<$Mgopra~^s@>)RLBo05hJ?&#^G14)2>hs*v4zqwkz1v
zo!mEJ9$YP0eD$s+E=O0dh~=t_-uj8^gKb@#iv-F-6#X;Xi9-FuxqdiY$-Ds}^HUS_
zN%d66seu7!|C_g$5W!ib624C!dW_T`6B7-@P<NWeJS)YTs8cBXxNXAH{^*~!1`!lo
z1MgZXK9=;D@gWGQu*#C72<GDpwdj^|i!FK3OTfPiK>$Lj&9#d7R8U6Y`hUPN5I?B1
zs1&G1^%@?Xwd+^G7OaK!erMqgZXSJ0a+cX>#dp(AmS%lsZd+<BA~g=H3J<URbHFN<
z@Cl;DJA94VJ#>NB@?F+`nhrJvnTLMWcFkE6HceBx--|C4Hw8kE!l1IV$C^Ec(;+V(
zMf$M}_})qp`*M#ZKQVKH>(@);4si|FoWpYSadgCeKN}>5IQ{{gt+ySCB_()mxAN<k
z`gIBfl(+XGixkpB(8uOeME~z4v0}<=_fBvXndIxziBPT^y1$j3DAu)JB#$2`{0dwt
zvMl8L?8f*9=#rIfS*zw=(fiwl^EHx~;+pm%H&@+S)p!1G@NaG#7eU8W9_AV(^UluZ
zPA!Fw_jH^Vt$@wDhbIwBAQ=jR9q}#$H5&3%+t~kJq8mrvZfeh<NYY2|U7PljQU=8`
zd<tq6r)*xgZx`}==FT~hM~EkVlKITPXK+pA)NTP5ydk{4&y}Lks1;Ik{SbiOZWnq=
zZ=!{$;1mk_Xw#o5@4K!LdHG<={8>khP?+20ezfum3U6INJOUP1kT(_tH**lNjl5hm
zD4r(x4Z;c={|Qde5uF`v44-NJf=XEk#gmS~$FS=LjU4?on>Qdfu?aoFl0LhB`QD>k
zLCqsC+bQh#0P2rXxqFcP>6Mkt^)&ss3=MLu#50!6s(TQof?29L^c1NEetG^o;5h~&
z$!SgD^V`lraLkqf$DT9!gy)M74J)CRJdfrHe+59x=zFw)>`bn<>hs|?kc+HkUS%|X
zwvs9H+F8^wn*%Jy0G<Q?FKDbsC+rwq6$Jic2upH*gNh6d8Z(<)f!hSE1dAWAn;Xt^
z;QymaYm2bfj+0}iZ2Q97*ekO0-HyNJ_ZT!`fzdT_FHlw3L}jouhaCu)9bRacQc8cq
zA2et1y<$?8#TU!wTBUm0ZbJM_ejH=jL7Q!G%D7JJrt3WxZ2ysqW$rlNZJ%*!=N^$5
zTwjj^hL6RS-n-L9l4&Z5OAWTK?+g%{#*H+G&J#i%yLRUflw)i3%e?p9W1u=~^#e3<
zPS@<*D6eg9Zhg#^4>{Qe0f5zA6Y@YQZFD&onqj!@!``W7Wc%suveHk|My<CVKm!G|
zt4wAcD%@efnj<$wlN}tL2~Voy=(n`oYM(3DkkR<`9^-~q+4Y4^<XnSI^ODa#3SVAn
zZVRu;+{SOzCC}D%A4PtdBZIl)2wFqDU7-6EyUm%w2C0lIpQlCnnKa|-nnRoVLR@IH
z-j!W{v6-?o!FO6YP<K%t+b!oPZ)-L!G+yoGE+)+rrnk(kKcMWtf07mdz9lsNt?z!k
z$%W3Uu%{7F*%kfr@OhL@z}eq61VuZI(Ed`p=97l7c95izphBK8(eb4LzrBT|d$LY%
zl)V36dtV(^RkyuMY>|{s0qF)|)1hqX4(Sf*P)gm1BHbuRmxL11A*HBDN{EPvGzbzF
zsepod$Aa(koZtQX{&An@-t%}o&%w>wbImo^oMXJ>9q;@2`R{ZQ8xIz}YC3spwIJ;k
zlLUFD)0oWNAHG2g$0#e#HittNQQMx_>9_E#^VN%t(jUZBYvi+%!g)ap?9t2}yTq-M
z551?-&6G!`8a;dQL#D`NAH4BfQ2TIq;G}@G&|2&CdClJZ5zv!h6^CUB?RByiOOmP0
zYNn@K=J95ug!7)YecIHRKMPjI__i5UR8v2JW4Yz0Bfdu^RTDyblY1t?K_OJ=RT8`M
z-U(TKN~^bal!Qq*X3RT8zj<y-&rr3RWj|69Z284OkEQZ=_I0oQG6Lk|UfSoy{o$wL
zBem5Rd`?_Yf3L)#fUVWO2|!vPRkPdcu!@9%u42y9dS|{Hf6!hT%F7S_>7FrE+}j=b
z>mHcGnp<1Tq2mXrm0tj5rR(k25iwD622q?JF|kA3kB+e(jA2DEDz~*=agE4;Cg3Hy
zO0`~M_vc5llJ@Y;J!0HQrtRfTk)WEZ5X3Za;miv86Qq%87w&qImQVUs91jv8Z}Izl
z+2GgG!$14<#+R$_lA=k-0uXwn-mD)GpgV6Q3BI5E^IQ5A&c&GP+IOa;rR|tpnvG}6
z4Q-|;)*r8*_4CVEX(T$<1O(LC?Ush^0Crxx5Ep96ZDUNzrrf{)_37vL*#h`F@I#_;
zgGy<3ua4AJYzD}Fu592NNNKkODG|QeDX$5^hBa`Y$?iJI%ZM21B;0$MPF91uyfy!x
zy-Qw1`o`Mi!(qV&#AWubKZraubNm#<drmd~;e*n=WYjM8EH#$VheuG#H<;>jm!NyC
ze4?f|Ps5j{McSJZ>ri{=){8%8%ywFd|Ky-EG~ES`H@6&16DUr-aoUMv#ew~5?^A<~
zHVt}gX>qgsAhEfeR*hZcN6OM)bI;nZrt7(|Cy1)Q52!uE(Qr$4nZatAvE53tS@s=+
z?U_Ja=QMgpE#P1dNLkDauo8P&O0cen7wczB5b>*dfp!EK<l1RJ4EViuR=2Cds#uAJ
zeEF&qU9;JB>aLzgRT>YU-q@ckPqElDzy`3tu$DqmJL@hsUz%fm62myyw56oe8FTY0
znndwLJ!#fz9H1Ow-0l3)gB|y}-bNd=yY-Wh(x1SK*KNCudi>b@tqYpOllm88wDZJ!
z(I)1q@A(!DU)^icltRwC93uX<&9&DzjHA-tU0Zos<D8GO|J)E|9lt32{`2eB^z*+_
zcr5|b{!$Jprh;7U?smh;QR78P^4M<(@qyZjq>N%gF-GpsT>8!Ta$V6c1ZFWbq+)a$
zVjW7c!EDLSx6b^gR0vRMSh&x*kuq-(RMw*975bQ0?D!t;*IuV66xqh?D^ms=|Cf?2
zEW#^5dI+Dwvq(3<=bc#Sjz=_VN0O}{W<@jdgP@(vQmRo)_mrues*7|}JOhaU7ai)|
zeZlHul!Ui&N~KSegN(z#d$-P}?5#(HHMq&6gP!CyGqdbH)0h@1#<%)GpWfBHTKWT9
zoYhoIsE<A-WkO)bODUP~7$+ZfGbeb*iKOo6{n?-UL)Q{MU9>4cbNJ{hsp4L4`J0mn
zGyoyg1DB$@dsc>O?GD4kX&%!V^y_=m)PbD6cnMVX0S6(dB#xmWnMKl6>h(&<;K~FS
z=m^`}<Dzbm%v?}Ru$^|Ad56!pie8-TY?#Ljw)qLA&zI;Psf;lG6Q}V%j=p-5Gxcz)
z3(wm>v<%BI!O0^Ll^wM0W`XM9Y2U}EIu#f?r|{)QcVV#FL(12xEh`|Dkf{-g<1Mlc
zYT9q4Z2hpSO5viq)m=cHW(aCJsky?WFTqPryH1ieb^lmCY8nSG|GSf2?v&pa`_R)C
zKPFt;DN)*ZwOJIT(hNQ-F>)mjtf%674lQXcvE5GA61(Tj@Fz)5aR_+8*~)iPM{B%j
z+SCe@y9sKRQ<-zm_ZQE4GRFM5tjZ9W%7*f$e(oSd!M*G^Dey7jm6GB6%5N7tN@LFr
zoZq0O;)n{k5Roi>8!{(*RPK`|`=A6TbrlaPxxUYI7gdz@#Db|U?K!tvaO%8X<}2nG
z1nVq*cwcq<q=u3_V~3K&nb$wt=()W{JYG}I>oPPezm}B0NR}=+utLe7JE-&=@wOUj
zNWbxzF;&G{U_Had$#MClVEonglAv?Eloqc9E>$E<=#?>6FxcW>Y*F!?Am8W3UGEFV
zReBKfydyi!0KXzZU*&bsFUnQf`?Qk&#OUjJ@-2ehJm(3g>S=_0TD%DN*k%;&ij*Qm
ziv#L;OP<l0Pw#m;1;&KD2HU<;WG{YNk?KHE4|+yYX46YWHH??QLH~K?>fGfCEDJ|^
zh*g~S>oCGpaUe+Mt3Sh_NqAZA9h_&16ZnE75I4oWjl=F4Q(w=x(UM5j_8p=F@q-*3
z8pm;>&<-+v9|GE~wOjTb!_KT~E{*I$q|eyLgw`5#RyzXDwY}!~!o>a}kG?8BX-HsO
zcRqwIE*?wo(PN=$H*DT^rXh_->^R%0n(peJDe=em{0LaDeP*|>fYOZ&1qj^PAgsPn
zYWsk~cUoVo^6P|jc9#-0H!i;Q!=e~|ve+(_%OWy~xV2TH#%JG=_HmNcwTmy>DV>Rs
zc9`*v)foxD<y+CUfqr+Q0(HK{V&9x?Crm@=QDQ*nDc3ax)7BvL%q?18s>s5d<N2A~
zug?2W*!OdsJfT8E?ie4S#&pM#k-|eIihbGZ?e9m)QI#TgPnqMhW$iYoin1tl(#f7l
zhEu!aG;VfMKMy%A)tVfYp6H;P*@&HLGJzZphsb9EeuisWQLe(qhnHJ)&LqwGOk1e<
zjAeROnlKA4GnzqW#M|VW3tsL{+LzIlOCgGbdUS(BYgLx}1uFHp`~!LC$<NVhFjJe5
zDyYy-py`CJqnt27t!3K*Z>h)!P*MCW85+vW@j6$st1FA|l^JNO1Q$@I_>X8fF+bLr
zy-!0)n1-{==tQ^pF-F~Cq|#uw7-B_o7|L2~lKQ4X+{V*H6~2_sQ-K?O^@RRI4z=@_
z(to7A)DC%bHZpuLhqKIC$RxNN$S@78qspVQiWL;6PO&}_5>#g`9*7mGG`L_$M`F=Y
zpmZs6wpF0*;~^If=}5)Bdm?-IxuU-;*$uRifmiw~)AE)h&<4zE585pDq+^8nJ=}cV
z+90&OM^P<FBAEJW=fOXpA)-J&Gq@GrT!9)^ojUe1XDQ`o448&p#OAXZTchH#Lu}pS
zer`P2bf93%&G4Ob7_~ON+@HO)?KALSrOp^kA6RE@1-e0Ug=g%01Qx^*xy@F_F?OS)
z;hQ@-vX=f<MvZj}HrO{%)uLIB-NHt<5FbAD!UHVW26iW;Dj$LMtArG_FWz;<S5NqQ
z*1`oi|N8=6sW?Wi?s~`bd)3ebyzPWeV{lH)1@C)9wElPx3=!9!8nWy>HGYQ7M5;n)
zhdkN2az?FR`DS-4&N8a8HNYEL4WafM*T>@Y>Re|a_sQCJn(e<z4t80;0s{9YUjbkS
zBEG!=jMhJq1mVWdfadZ;N-dv0USz&B$&d`0A<+_%LYb}StWZ7|iEu-VCiB_rSn#90
z-X!n8=?Znv1o5F4xxAfYKs`o4dC1N*GmeK|1_!i_8>`5bf&O|HyJ7#&l_^5QC|f|z
z+$i{FlRX(<C`%vAGaH?y%afm<DxgzGW;yc$Tzhn(Gm7l<aACN}kO6yz-_d06fjswC
zU;?o)ay(7eT~I%PaAmImo%2<WbPWgv17&s&3XDi83=FR-VMy1zTY>BZsmzrdC(%wX
z5Y?`gA|O8-4QeR<`j+B-4T@vn88Vy*bX4Hp0*7@54l3QBz^J`#w*`}2G6VsN<q$9f
zf#9nK%1rlu!%t_a>_F+BM>iX0?-anvLwfRGonk8yUep)wFJ+LJK~5?AcpNy8Vt0~P
zpkPt_QYZile$YQ*D$p7Urko|Xh=4L8(ggpFTGE-In!5iC!3GS<Q4*I-lUXfb?>t^z
zfJQjn`U;yW5FERILz<C5Rvvmd*zE{N8QSeBzEPJKgN!X?x__jmyuz9GA#Q*11NPKK
zToo?%*9eoCh??iyr?>F|3Rh`XDon8KknS#njPN|v$GCohp<Ja|%`8~!%$1u6(rGZt
z_{<|MC{V;S63>3%J=MhURzqFY5aHUYUt;ll!TvwFwh7}LylD;bpwl&?RP+>SW`gvs
z;X9B`>myTzRfI-x0uW=eSKwC0@zJt_=htDGDuPIb3U3bF0U(J?6@7~rpo<p7dC{eP
zY#;pG_+M&Or|@y#k1f9E@<QBgn9Zs&=JL0cDo2##XHbN*I0GH`$&SE|g%+mu#&@<t
zh|2hfK)@Kh-ay?k$&pXibR9(Cgqy}YM%+o8L`0;lmev8#NT9jk@Zm+WPG#5u(&RF2
zI%qTA?nUbz0CllR^iw?{SFm@Js9<<M{{2H=inq}ty81Mp*Y*SHh1}`qZmP0f!plt>
z*NvtBATjWmik2o6I_Tn}%mN=}LzLSKl%@Mq<=?yrH>{sR&trYx1BE`6%va@u0EtH4
z15tHu)HCsdS6g0BzK;pnyVjfNs0$r{L*judsRhSr?ihiG^!TSVkR)Ez;{B*H(ffU6
z+2UN@Fx+tSD#%2(VGUWFp}9}EtyN2$>RH_ybvj|P1fC?p<~JY+PY7Jp74r5f&ch1(
z23TI}m<tfrc>%-5ic_OKO%8XPcHU6n3z%)Y{BYv)OxF*IOtEO0%AB0PpQ5+b#E|Dy
zVV-8*{(9M_(OG5W`#U>D@Hbbgi78O}W^e9Dy`~gE9!8Y0fK=Uqb54+E|DgW-T8-|_
z%b?)%h{npi);?6YAb*+%MJMYkWZ4f?hHE=tVtq;crDONR+=<v7MV<-Y_jFKdA{9;^
z)Uq!|dNZgvDPrFkz<?Cvxf-scg{-~kI(K8vUsV#=zXB;hFqGqEn0>!enynQLJ#c`{
zh4)a(Z_qLFlN<!dwW{2sZ7|b!lzh%4+s>Za5gyRoOBGb&e6k}oO1@tl?D&;vQ0Y?P
zCu^7Hq{2uMauV96uAG0E{aR2bm9BukJtPC;PKaEOUw*mO*M>oRLQj=>l+fOpQabUq
z+l*%+KMuQhOU@&lOsb$VCpXVWq#>~|g~F$DYeC8UhSD#TAA3)-1rlUAM<vE}MdZ5i
zAKV#Qi!`Iuj4mZ_5NCHL73K2s>`&z<Dn>sPWp~{X*5oUL+ediT3WNcl$Gk!4qW@k&
zoD8(5$kO}}rqOVrG>9bNxGwO{bit%cTjQZTwS=fMU)@*I&zvvl2k17fgv56!@>wdn
zMr%L<{aiS~moN@Oo)0v2rldAbR6+~Gj72ei3+o8i9U2U?b4I3Ebu36V?&qb)@B?IF
zL5S9IQ;B`Qc>p!x9WX#`LO;X5>1%=3RgR*w)GN9}#D6bxqPdHy;BZ5NJ&8gX+E;M)
zczb*Hb<PpI7P`b8q2nO7Q!=ihEG-ZhOUu`RqdX8^=16PDUZJB2#-q-rvg77eh}S3$
z)$=W{N8lKcdyc_mv{e6;;8_6cu7_pH85-Y59KE_OWr<aSIaX#eQ5@Hd(z_}`G+{K4
zQorff(<A)>X<wf}^d>PVB^HRni;v|se>tpF^!p`$?1O~dkCu*?Kh+R^&<!y%<|NxL
zDGfWuhVe4R-6^s`>CFS<;>*nAtMhNuH6y39{+v)KZ9X8|=&dBypu5xvDqUmHR=AuP
z*6)5*X8Gx_9V>}M&EiNBda79G_uJ;R<6Te3<F1}oZj9)=3H^vKDpLa~u4k4^f3cWV
z=yDMopzjek3YyiuD633j!`htG$}N&Zr@|J5*A~Rq0Lj<w*Aktv**7Bl`H?i8=KEQi
zuqp)`?*tD=4pyGiIvM9a(o(em(J!m`&NJF&<0vPH*^V)}Yj_!Ltu^SMvuU-M<X4SJ
z6O6x&cq;f;Fw%)e6~8f0RiZB$HTQB%V1Gs`*}$(%l0xEgQPVHD_^40q!#SL+?s3k(
zN`>K84nHP2WHIqc2|nw2@UMRb%+;=s0&5_>**R!x`k_DuFW!FzGb-py=_R~Xd+o-l
z%IFFXC0ehH!E&4=sUt?r9xp{-n+2(Oja75?pPmeVvTW&rmDi{LJ0Jr<Tq~sYp1*L>
z0TI~aXyM%XTwZC>N@Wy$xZoj}qlW<d?Cpn6umcGk5v>fwLJ>dQ1)Y9M#lwG${h$A+
zp)~)4Fz4kB8{hp@PBK`C6881F>4W|0sbBz&1V1$NxM#Vy;E<s=%UK+=L~;%l2$?Xn
zM;f{@{J0SE{b`5$b>jOU5gHp+X(7a8$4iQs?^w`MmfJt(2`KyDZWpM}b8hz%52T;w
zdIDMQ?Cq!jH986-JOI99-3Q`IE$U%9P5qdDqRJNjtr;NgWY<}d0pXZfABB95c-hhw
zj%sy->{(0OwJ@Z=AE%{`;X?g}KqM0!WV#cLZheE_9%<<FhzISM%v9)gLje9&LQ(fI
zz{<@6UfwbZhftLD7N0ZSBE7(`^_526hJ$Axw6vsY7T8XlJ$;#$J`~MWK*VxzDgcH+
zDrguCIL%p4O}?jKoFM?hr{==sL&V!$qm{^RoQi76hNf>La2Q9-gR8A&omsi#FcB!M
znb1M*2|%6lB|YhGa(=<qtzX;v(D%;TpX_G*6VBv^`SSj$W0e{|EA!$ouPHYs(ec%r
zP$OXo?eG-Z32MhZ4va9J=yr1CF+{UE{}VqG=3%(%ItzET<S9Zoh`s;~3799OHx-N9
zjzgT!I>YCj&dPe0{O&(Y!x&5%GMfvD=Ur4Cm3R8IyODwiShBOR&0S?3UNC@1zG4`2
z-sOkaqqulX7Ddubv(AYC_p9P}-e3WQhvEX(J)G?ERakyR+DG$FA%{D0Ls`9&C6y01
zit#Mq@7$BEc$c+d@vOqvd%gJ2*E_M*LOlH&4n{__n^7bTML;E0Ryh?dNOjWBg?Uoo
zuuw!n5UO!;FZ=H3pF(KW?-SL^T59+R%iLKBb7c-KJ6t^TQsep@;Mm%rA*yxKYy8O@
zSW^EPPN>Ep&fgh<2nJCb-P`A1I`aae<W)8PhHo1ifg6ysAZr0SDnZt6iTcvvAnNsP
z|9PL1j^qA#x*bC2kc^akp#Fh0%rZ&1)!IMDdz$(Pw9(g~?nx>2K;tS)@u0wYCyy(&
zsJ(iZzJoCCMURx3cZI`Bn^Z9$B-BokGD(zS3suLrKoPaZ_6bv6Bea17^$6Lw54gvK
zJ9ub6R=?wabY)vZ<dIax^DH7#W*JfcsG{*}&`jd$?^kWn!1HeH#afP_*b+T1XD##@
z{{FR}e|gLT@6Pd1Fa`YpJ?(ZtbV}>aehXo`!YBT14#R#SuNObgC_;ye&2n^A>LWYo
ze!KHgFVfY8|8kSlt!%mT9Vm&y@ci(px(mGk_O~)^oRsnyeNpAXu8n6XN5)yl?%Jn1
zaWqMIToip|c5hOC%7#Js4#!^><HvW<YAOCo;f=nxvz3%qmPym_TVKi-u_2Dp{vE}G
zn_Nlx>m9CkQqXb_A?AMN{oHs&S^nguHZfG@nF=5gE+ufRKh<_wO4K{1T7W@uh~d+e
zU7AcJjL@`)#y?fQh}LMTnth0^xwzu>)<FARN6B>I30Y~7VUNqP?;8-FEAr^yZG&`9
z49#Jtg%l^ibp!~6S)S*XPBY-zs+m<=XZc-iJB6H7{80LB`!=EtINK`F|4OE_b6emQ
z2GQbTD1+tN^jW2Wml7LZu`bT-_n<-_1dPT!Xap~wd9!hizIc$hKMJuW%Y}A74Q}0!
zl(Vr^4liE_oy_YUcD%JE*2~-gtSHN%Es8PO%TB-=gw8lE-Yvg!tt{1oL81?5mLCkZ
zs?LI)WA7St$|tsQt4`v`lEr^(Pcv-z3`y6Yd^_j@T-|{L<U+;4jD0v+2mU4mtim>r
z0#bCPh%V7vj4`LqNDs{L=MYAmW4rT)!LAodn@nm~`%8xqDhvG#M3(9L8R1{uVJpF0
z+XD83BD7)|Uxs=bas*0I&t%vI_Z3S*l1iXHzvi$^UQ*vUlROTABIu3!jEGuF7Lp;B
z$~#n1OGa;j4wJK_qd}DB)@YBK1qUK1F`*?-EXIoZA}v29aj+$9L-@0B!bL{fpD#CH
zL~{-0<Beobpi&wd094dP>msXm8yR4!f~w>O94}X64TVw8yNTt$CTj>)RB?>HK|rnk
z5MxX>9IVQw<!Q>cSW4J0zgiG3lLQjB&BB9u@`&gOJY`{{3NgVMP)xwya!hACtU;z^
zlB}0kR4ZK|W06aZeke}_G+jMP)EI0rNDCzJLT6MiwpLljoW!lvUFAA^{B^7U3Merg
z_E7fTDo!3WovyJ{?2yu2(aBW28NBta6zZpoc~@s!!O{n6D;&<xeCVNMP-x}dXwqM$
zg>bxyk>iK&gps>6TFaA8%IQzdM^Jv4hnu@?zYywCXQALI=5D_*2`8QmR-NTC&1(ha
zDAMO>F(4OP>pAuGcb7nV_t?!XNb154VzV$lTJNmwSP@7DIZ8@AO5m$e;p(+$2R|fg
z;nA!#VyIi~Mx-ksY4$_OikZ)Vzgw->dwCyLHOIDLbLhnS1ai$TwI6PA&hM%9f~i|_
zn<hVp!C4eTXMV3b33{V%S)7P5ZsiGo`LE>}ds@<5oE4n86U1$kUIGat*=f4_TQ9;n
zkvz4ej_;PGqlQ4Nkba&V?dM3~dV#{<h!=ee<tBCDy@Mp{HX?YLtRR(JRKsC(TJC|I
znZ<}sG2TQy$afw}lTyz8sonAP+mzJ+GRr5b5;MiWQlF>Pdr&7sHoRIU{Z#u@ipX)t
z-9$~h(F6{TkRE~1Q|sH@!z7oVD~6VdJmIUdaQMb}k7lm5(&lkfv!7|5e(gt~cXnnk
z%|vM8^K$aTQ)RjN<e1@wy#%2dE0qB;_I6wr#oz<zCyTipr%5Pl%hTXP53FnI;~cj3
z)j9!Jz}IFbffU3e9Pt=>$Uz6XB2zYHKdfgGf&kdeVsQtXkW(qMcSDrNRYXUN?aEw-
zgh>!(Xba`Wx_GSB?a{jn92BqzT$m@?e!JD;9Cv)(!9FedPZJU4@{E(1)qT-;mYae0
zLF(y-(9Wa!keM2f^y(f!0HX51<J223z6(~;02j1Rs<qQ;k(m5~9F;Oy8Xz4th4Eb!
z(t=zP3PLFA3~R%+4xLb_<O*0p{D)hfLET*>&w&m!SmKq1+C1+Z_U(agKo}-fuT#_(
zz42@fI$~?uw&LzRbL=Koh|(dp&k=ROO3UW6;hH;b&~aV4yCQMSCRn&D8i$UDi^f0E
zZ+>t$I<!AK`p+)N>)A6pBqlASY(->>?u|BaG;I)2^+jJM{*^k-nH#GivD>*nm}!)1
z;ivacfDGa^c!vFzgTe#B6*q|I;a$msn5G!ZvB1#=c0~*;ulU3X$m@q2J<X&h+LZ^X
zBD9f$J~)HdiC=uL<5Yi1jsEPPgKd?Sf;z(0b9F4fU6d2eDzYSMA*+Ie>CC?OMUVRe
zmzFtT`Z{)3)Vl5f2nP!$PH7r}9H*YF*)zrB$%CVm>rifAsGgg6x~NN_kD>(Qn;w<@
zv{=zEPl=w4l^+3{45*?2CW5!8_T%=T|5DrdCGa@)+U3QDkZxH{;)!FOFBx|9Wljuq
zEqwd<=6F_QBR<J-c6l9CdnVeGXpuPZr!A0U6{-T`PF)#yaO&xzHI_Hg4?)R@Knp3-
zv}c*M1dZaNUK?QibwPvQpYH1FFs}5ei|2M?h}AK?R@~4p5Fwnxl)0R`$Z9`@S0h0B
z_|kl$apX~O+lTr+MiD{8dubyjOtK96%xO(?=9=lTCFmP&QZGB>Bc5|EO*d)?hE-3m
zieK_JKT8<oN{7>b?M)*y6>b~0Z`;zj^|W<#0<(&1&Rmj)yh@rd6v^Xsb0e5nI}#YX
zrp(`GX}SuwiPV1yQj)4BSvFEdqs#AsxDJt3qX1qY#r%EZj{z_5i3xDv1rw@VMmNmy
z<wZF>R${o3e*S`Q8G}0fPaTmUJ-L9pkRoru^ZueTnf{WBasGu_Va}?|A9;kk9d4Cj
zmo1K3g(;y416Bq{$Ndr{<^=>UVx!R$Nm@3PG3WvEGikJ;N)&`sYgRF5-A*0ibm^!f
z($IIE_k{FYY4Ui=`BlsB97P-|1uuwH_#3uZ6`i+r=wt&WpP`6W2Zf%8mx~-GEab>H
z9b{J}pZ?)%^k9LT?eV2eR{2JV>fp5Hld2fh$GQBBknn@|u5j4X2eF4=Hi9ZjwO~>-
z87WPd<2{Tu+3=S1*tD{^%`{r`Z+tpLfe|2BB9zbBR5RRUMTJciuYR8(>OAeC=Yj(;
zgJ{#6cX;qxsgq`;uet~v>Q&XT!QL>^$w^ll!gM5Hg{1o@;3DZg`jP%RCKZS45g|wl
z5yDM5;$jjhLdTIy>_lsj9cn&|LNt8d->{spD>~%M(@j}Ak67fFKB&O(iZ_j9Sp=Mq
zjM-o|`9$F6g<6&3K3f}l9!2mlX@1BK)&HQdUkJ}x8$#v!Gv7V|dXK3C#8+U4vur2h
zEJW(se;6vFVK%t6kPAnv$KEyml>YDV+UJgDsM!Bd(-3(Q1%NX0H)t_bJmSH_rGgP-
zGRr|P4TB;^Bu|naHlC(@fsFWvfe|C}q+@W8Zo7LPWkCJUqr{*(kg?qn<f^<^e(mpo
z9WgKj+VHgsA`Sx-$_XA+_<bJGT#3O5KnnYyj87M}paM4{!$Q`miu!#U1FBh&aRp2`
zXK+)0oNN@#(F3)r54sj5VCp0YKIgB{4fWkwm{hbj`E8ke#v>`c7)&HeEs?3w<t5Xl
zCJ4I~;C!D=RY(UEc<Xui@fAY@)djVqN%oaLIm&tjDt#gP0%wlm!A2*f)yu+Yz|lb0
zXbmEwSry?7VPt&-57%{l;>GLMX1o2y_R5?>!Gjn=nh*>?U%o77;TLCb)PbtjRHD@6
z3vLmsCd9l5RHDY9!v=%}roMlC)9d|ds`(NGeNyB0_#?1`_d^&6?no!U?;y}Qqj*##
zL}f`oK#A(hM8gUqg9Uy60bj;9A<t`I(m-%KpVLk5ZwT~@D@;i?*=zN#XWzM@Ekn}q
z${Oeq5a$GV>%E)zx@BzfUHaKj|60x(iiGPIRJ83ps~RDKE5&#iaWAROLqPf!KK<wS
zuVjO7H=xM?kpC~BkB>B6L@$8g)%71s83>5?^ZRH0Owsoca(RHb7|g~#;HGfLpP?z)
z{yR{`5W_mDnSeK75>o|iBcw_Kk}NYI(45Nr{UHeD!J*ayQTPN&KjOCt^rHvoi~FIK
z2hgJ_P!|JWEoJcHZOa&Pm}0uc(r*VEfdW1+@9Uhd?gA|q`2m6qtD6m7MPARTFnZvn
z=c-UmRelEI%v$<o_J$FYOoL~p+BrCP2x-5AjF>OS>o<_ym{yp6UA=ev3Oe4P3gEm@
z^9l$}QVJoegqi@V2(;9eb1ngr)dX%KEFWolQzdWyK0q3_>Wy0fbhG3JMEe^gE}L+m
zUxY5L4_(GRBqyaBAUlYD3qB2VzP&8;HUX3aKf9CQR90?Pa>Oc?X(Ck!q^V7ONQSfU
zDtJbAdWCdyv!BU%n@a0qWDuwo+YWLtF>AiTorvfZ6r>Rd=QHOHua0#|9khI@+wVih
z9kavH3H+ojr~%#i1D)>(>V6TzMpy(>v;ibuGr-6S22GIN-#=dF$<u#+2YMIIjI|f?
zK+{a5f$nf|(@N8M+$QWw3CRy^fb~y)K)?CLjM-8WB;ro6$Ynm}J@M7>WLz>%urhNb
zWM<trr9n($s4T*^U|G!6VQCU#^l0)^wg71igxuT!3&(TvlHQXvD{N6>I;KR<7OG}Q
zTOUL7H2M3bj)p5!-qJPY+9K7dZm?mh4%h|JhsF_oaNS)Rnr-!WU7N}Dh$3Y?zj-lE
zpcfW(r?@*J*#f9N!}!}`ZF=dSd2VU29Nz)E4{%us<)4r>Az?Wp%mV3nR;HtpRb`Ev
zf!d-`x{1y)c!MS(If3fa1y&dHa^~A#1OrqbLeHKLWl^rrF+aKYHxPE;lbPNyRRWF$
z$RxfZd_f5KR@$5?ZZCa&eed|JYcLXFBXbJt9#(E(d;{`+A)!C1`?ue=H@=$;sL7Zq
z1y}XNVN&ZXew<O@c&;A9kwH9W&-{r2OXLFK>J=_whf=^xT_5`NKcW7tHtN)`%TG;I
z_TlG>s=$(IMojlUR8T%7##!Mj+r(Vt{}5@j_uKIaFGE<gU~a4Xh~35bTWW(4Yf~pe
z?z8(F@_IlS<X^K+&%MbL6<gPSv@BK~w>^fUUjMer^>Flr{1;5HwP?(JFgU3ME7;sR
z(vGlBh{%vo($75EBBlYDP!=y+Y+@3g|LG7&vlpE=?A@jc>O`oJ2B)J!mW&-95ePnc
zNMx_$G57RLa3>VkxO3`<P09qj&Ug@=uuj{1Kt^bkgZ87yIJIjMR?$-9$GTNnmojl~
zk*3?ShXhAaTO}9z5^Y`cE~gLNiyWr{_=xtxfxPJ)VxJ&?zIh{>Zo=X8s)D#d@WbQE
z4<$Q$?xUbg_xoqOE3FNWeF`s~c$esWWN0ts&lhXuwal#E?+;*4p^6{8f|zj=g4@Rs
zu+NZ**vND0+qGfE#5i8yUHm!SZxbYA71O>nUT3c;_3P5+x;yQKr(cR}t#4B(Cj^%o
zU7^~Fk%K_dTTtBtGz0>I&l6m!i;ZTy&0XXGg*$xuOr2nbD3^Z6EPE%sEMY9k{wef{
zT{5&_DX|SnmN-#)WlwIE<g3Y#OqUvpX%>G?TrS)o*&_o#EgUS;Pec^doN<13+J_Qp
zpvz6<3q~=XRxIe5ZYN{DKzpfS+WW;|t1j8JkIPVQ?A!F_u$w>?8h6ezx*J4UgL_*6
zGA{D7F}Okw>XwPT$a$g=>vZw$E%~*W2W<X#YnvZk^$p@)aE@`+#L9Qy(jCO@bP_PD
z9<(*^175AO3@Km7)Pv?_yxpsiG@{C=;^W@Mx=`$f2vZC4?4G++LEM^KJVS4XUyAal
zE=Zuxc8y{Vy1tcB^)a3QVh;L)rkT|JZxK&x``Lu>;_`-fhYVE)i5rM1zc?1R$gNVc
zVz`Wz&cF*n<{*(!*8B~IiUoJCHa!aEa1B3dJr}#e=3v%{VK#2dK{OTzhgDir<(x4W
zkE!?k_n^n^^{J+KNu(7r#C6xVS??;ilbrVA<Knqtzae?muncXVlq9q;*ZQzE15Ft|
zk<&U=&?9qU;x$q*g<eu4;nO(BSpq0Tlx&G!wDWvv0j(&U1|Mg)+fv=p82%SRCHU?A
z*I>6<<a=)uN=<1ba2D_0prDWleu;p?wPv6LC-qGa`e_#sG?xRJ<`+mw^`7bS6m)G1
zDq2iy?-^+GX_NDLcjdSjoHYso2>(!^&l<mGzJ<$6gK*5AN9%b!<xb@DW3s*rT~N!|
zLM@QRW|ZnmO+@s(IoB3!U<oU|g#-aR`fNpWbve}%Lc+<kuh^(vIM*jZ7b|12m&1H!
zev96ZJ>mC4L<grAQ~XmL;wZuzr5!;BJBzbl-2{w2+P~@OasL#mNm(TjL3s5?ghzZp
z8ugvvz`RBC^_T}m@+~f+RH7|;`OQ~F%*S&GIY=LxCI#pPUp!&`efqe@hw|Rw_Q1P0
zB4_(0@mn&~9*A7f|9(aId7BDXy7$t7%9ZWlp$GBB{okZ<PqyLL4yNF}?Bee~lm39&
zIud2N<zLaJP3_&sc&l(S@1fV_MKT*b;jTnXD*HxokA9#mx&@y$(mOwcfK_AMGD|mN
z{h}o+PtVeg)T<K!;!vCv;uUcf$iYU>bF=~y;7h?4j=i<O#+wwW3z8j_c#-c&9#USl
zN|1J=ukZ#X5f!g-^J!Aaf`e5S%9iPRu|FGB%KQO$O{hlfX6FNq=SNF#N8l`O*%Q$3
zsNOWhCdl#R@mslF*-|hGI3@N7U>II~pW`OG5haTegc~xiF5D?>4`MlglX5k4)y8wP
z5qCJq&Oc|NHH4=}AY3_XO8SIz&#-KTzQV~|&kBPl0dX;x%_zjWDvniH?;rS7VmTu%
zdIaXtkB`;#CaHX(K)vQHA6`Sb6gFcimW>N5EJ!$#bV82fstLU@_gYGu686vD4F3Gq
zysuk0GUZv!n(D`Eh8I5?S~euHSv)N<!y045)kz(8czKD#DV3+W18q5OZv?WJ4kmOB
zwklv_rfT_Ab?m&*+oY_UfYe5q5&LbWE}RJ`_Rf@6Rut0VZDr*5ED#WK0>DwE;}%E6
zG{^jSa<>PU8I>_Hos4Z-?$^Xp5;Y-#UfHk?&RaS0xo@E&k2gSfp>nS+{A;#4nkoJk
zXDM3YPS$YbJIRY=%#z}sR-0wNqeku4ZAjNoN|9gmCUuQ3&-w9{zJZ&URq=#~$CM!R
zc7^EKVwqiRs6zldeP!b(D55u;OXv+jo_vy1g16P%veq8AnoiHAgk|druCOW}>6noA
z2LdH8vSmTXMBm|!{ZAD&qGZTE;3s@o+2W)qmK>1m_=89FT!PNTm%fKT>gw%X8<!#O
zh?pS09<E>pKOYjupY>-JOvMws%Oj<eQ=E(ZML%WsIHoCJPgZGk%%w`&JG0XeNGA^D
z&V7ik#-@{~tS=06iQi_{u{ao<yS=keIPmy!U@CT&;@yG-9lUW4#hoA0&s{M53az@w
z%xzn16ARzhdzoM-I@W3|_F{I`#V(ML)^O5}ed%2rke4_cFil6hRvd$-P)TUbu1U4W
zy3m{%^L*s~)s^}LQ*Kmlk@c2W2g;KvzD4jpt(Zs~)<>3NzvHq)v8c0eZ@c@8l|n4<
zx#u^5I?pH7<YM7K`P#?`@)=@H1i1|P^^L&8plYJpho`8bC1`<vdyQnfvT72CVx5On
z4;w?FY2MKG(O4rjO`2PojOX0+zJ#nzfxaIZZppBp?IvUDEnNH*QuJ#lI;zl><2+<h
z58j+Jm%KEa%ceE;9XJ^bWsx-zz2qfNNx#<lP%EM9Tng>7V)&Go@w|u|pLLmRJdC2f
zAn+u*9_N?VtZK-WxexvEVv&MOf}B0PA84d3b>8tA2Kl#p#IKWkcgfy9O@J>|*E6Q>
zHPVI_i=g=0?ISuX*LFj_+F=)cE=piYH9iIp?PSEh>@hiCrF*F5XOE<Z3JPIzq;2=F
zRn(iz8oZX)DOy}>Nat<njJFcU<1SQEqJQ$;)z;;iQ@hp&9k!UvK~=m7IpCj=Vr9sP
zayvv+-MBj6Rk`3bVL!2*`cZQ7mhoNXh@Ghz2BpB-F*bgN#K>MQ3(BHb8cq?RCf%J0
zKaJvsFxQfAaI=5UAeeve+(s^?5#YMqz`Z{v;2MG=%v@Zhz{D=ahA4T}b+KgY=RT<&
zyp4mSV?E@dEoT;bV<2}RYhryGOYh0hKp}QFRXeT!$F<m#+sV3-rs2O2(@k_Jz#iw0
zxx5x!yE|TOy_<RB7M5u(ZCW!{Uulh^(OWSph=Ow6kLNT+;&XFVFNKTLdeI;rJ5XiX
z62hCTHBIy6!>hJlV!=Tkd_xrtzS+@<$3K4OHgOt9CJPydg{f$Wg;I=!PNY>@cG0b`
zWgf>`bL$(Zu!hAmDz1|Wby{b55e}9l<1Q1`JWJ^1x{@eH4K>D;9^#FpW9GCxJ&?T?
zM$5R%iZlN1kWDa}dQ2jeY@4_=V_d97+cOemG>vw#uO<XHnH;q7>J}nUia}8!I0IGo
zf<Nu|c8rpjhPmC|0<Id%C;?}bfdVO4*Uzvo^C_GcdJ=r2vI2s*(jyBSAm5e07N%hy
z#m~*qJVN_001jHg=-$wCfvbuUESpVP@q2SxsYGfr3imTB-Gnz?BVs65hCZK-5=?g|
zwUvsOcfdGz2<`BRy7;2lSoGaU-?tk$lX6lD3n6-NLib5bwfRoYdSHJ!$K70bB|0zm
z^!YD8v@5~vXPmE3Lz{)~5HsTG6wt1SuCV{Ufmg*)d&es#@xJ%up+6{=Nadr%x2Db&
zH0SLYW+h|F#E7qP$x&q@pH$9>MpIDEne&)CUvn$TtMr)`sxAUSX|@JYJcm~paj|R3
z?AjC3cPz>wfm3r-$0{r)VEia|lGkSB_C3TusOWArMne+ENo+wt*h!EzN8+r>Ma(J4
zXy=~q9!`ED19h87@&aqSn>twnK}gON{a4qEXL8+KMmo8*?3D5--hP$qRKou19*S;T
z-U!^i)p&Zw&FE57Qab*_{ix9nJhOO*Sjw&l_AR<D(kr=%{9pBtT}-=j2tNV-ceix>
zGr87KvO00famVzUTFWBSZEWzy^+p_$ccM@4K3BmvuND13k*m^jt-dB&m%(q^hs#WA
z8*hYo-Mo5>(28=6bZtK~!$)Q-!~7J_Rr~3blM|^9&-R4wtTgr~%{?YEV<O;9P5MDb
z85kpA<#Lf-%GY4kkacm0Ps5kk&*@3fVC~>=cLZk|+FxNmr~W|}b;HNgc~u*aUbJNd
zShb4Oj&jJz{3(_lF+6`w4<++JNzTUKBMKcKaud%mZ}5w>R;j~$HHDXG0+-g9IWxP|
zQh6PpYJgca)4&kH$&xM>;>7M2#Y!!mg|x(;tw3~P8KH=njjD+TM{|M_ha6e27AsSH
zxs2Jc>x8FJK1*Lc7QE-`ar=4xLFX8eWsH`nD3dPS-rEs%7==nKLjx-_K*oeZVOQcG
z8F^ffKphO`&^k06A&nHF=VM+vjxgmq*U6=iSI0Ox-cim?Lw!QyD`}mys+_Fs=}He-
zei`?*(6AGMr`W`i*n3t_8P6KMt7UaTsbKn+aqD+!p3<!BBXEzK!kO^8SE0(3Xi-6t
zHzp+L6;E)}^v|5*wiG5BcNB<2Uxio2Ik6?Zj0LF&saR0DJwj5eEH-6v>O3?f$w$o_
z$8KKeuX+|pF)+23)BDsI{1TAw#vqzQ6eZ<Up$3E>#MU+Wz8VxGNF^JdERsgK9~j>{
zu08P=FpG;tu&wGOM>(U>qo#9&|HO;{bbemt^-!<}fUa4GyzP-NP#_NgQH=Y;P<|W~
zfv?P+SM2zd571g4a^a8f4eLwO<o=yyj*Xg|s{9QS!CI~b|3ZNnB5L?xnyBezT@)S{
zpsOLGCTYk&(%b<NXLff_JB}oBz!SF%OVG%WDew_YGBH2yIZ*)*eC+5?1eEj;bk$tl
z1W{8y`e9cH+=d{*j2X2`0U_awH7nF{P2EEf6p>{<Z)liJ*cA(qqOTm22S|GZ2#NrO
zfK{G2u4VC`$$)?rt-V+p_tncV=<iSc5Gc04Fsokx&shg<8+~@~(<8WTUuyHxmYkZg
zcnl*#Y9Kw~$Nbp*W4;};RS7(@TPT?fe<4cqoqsSV!Y>--90(9dn2PwA;#cXX#bV%*
zjJWfeX~^F7{dCms4oEQ>vU(;krs}Hg^awSBB%c_GhA;teCq)-(&ctJ`0J8b=R*K&t
zfcynn&)+s;W<C}W9Pw`8_E74`?d|Q(-@4~<D7krvqq2nZsWUAf=j?FE`XOmvtkSM+
z52wQHTd#EI-Iq~!wvf5RkP^}458^EVJa{4Iyl($(4D_WPKf#=7H({P82V`YI1@Vtw
ztlsG}(&TOc^23Ajg$a6Nuvog_Lr_L`wASEnc1n2IeY8{~=S&*^{t)@-V4U)ww*-_i
zitwlxZzhZ?A!E}3=vR=N4u~xY@c2&N3ok4=9E}J9=990#2c>*7Xk@$uG||=qgV{`~
z)!8Te{b_<mQn$u8pkyt#RFWE-VSDW(c#I<T+*GR+>Pz6tfvDPo_NgYql83K>KV^WY
zArDzVL7^W9*C|~$3~4#!%zJ$XX^KpK0K5f_7)TLnpfvg$x?&yK@7t7F9{zIZ9bOo|
zKw!%!do~n2MAz?#0<;j&IHbUTfc*Y)K-x3_RgmMp1QKzwh;rzg&W3%u7S72$WX+|X
z45do808n{-s(C$uQM}x=3SmpH8v=z_aUHN+Fe^T7`7ow;8{rB7KgT>OMNjSlwA*=g
zT-=<x^b9F$f%@`yo*b`^F~FS-JfNmUqqxzI-`51d(zS}We$0ez&}9W$Db_7M?*ZG2
zkNphA!iG+1sBQ7)U#IHbl>1!SX>7v03h<av;d2Cz22v;_{Qkg|<@Q?8f!Z%^EZ_mU
z<1~HcU!Er0ztclr1(5aRQ=SiJQv41xym_7b`pRLEeADg2Yfm>HDe$s){<iuWXy)%N
zx;tAy9SA8OdjEn_{?Bh8w;>lmm>N(GMgUeML7o}~Gh^g|0TkS}(Ln;^(|iCJ+FQko
zZz<S95jTarhlt5;fMxT=Uy#V(Vv$e(DSIBIhNpZw$uutkCH&yWc1o$@FF>}FUqc-(
z82Ax)avlO0SJ<7$tUpxn?<0Q^g-IO~vStr$D?z#x)u}#tK*9OmtHhJiSHy-iilJ7T
zp?mz)b<rGPSX~C(a}E&oOu;Yr&M3H@!p`GL6?TdS>wr3qvoTY#EilaCV|A1FHK`Xu
zq3ZswQ2Ds&_jn>)olJz^A#UrK*Ab&SwXy3E4rGuegk=4R&dMv)nS^|v;G~zmFmDe@
zV)t`E$4~Z8QGt9hvTtKhc(Ar41<QtXph%t*;V$$ESxUa!7E3Z|SgM>Jj$`hT#y=P0
zPh+{P2)I&(H|R4Kpz**0yO=1AC{U*>{$7cejo4sgju9!PTfodi1Ax5-zwi%0(8ZiL
z8*pQpeXmCXdtX_?pW-UiJN_-DdqO^a{bZ@RgsB14MVKWU4}-maKqP_ifmvkds3i7G
zZ<ZpZg=flrtWyox@=tQS(!L>J_BZJM9DV#&?O|aBP~Rh;^*xSdV_h##=j3fKJoRt+
z+yih_b=RE14jT<2mJ?m?JEQ$ES(}LA`W|l8bEu$C0!5r?Gzbd1V&~On<Gq+DfRmDQ
zga7v7=8g$LGKA2C{7Jagz^5V>^<5;+A4iIt`a^Cak{M8QA*mqx)SnZINctdrHBimD
zEq$Y^QS15E%2&g>)8tGNtB84ndkd5h95SG6UW$NqpJ{Nd_=Lah0!SQ1f(7l~gv8oc
zQUK1ck%nUd>_l*OARqWu#^;4ko<58oD6H@dwW2Y24IB7Kg*=mw*^z+gDIgY2aSfM%
z9RN$OYD()sKKwP6j`hU~LDf`pHOi>ciJF^#en$jyJH7FUKxGaQ6LnkPBNdgHJ4GPi
zNsg~4JbQxZR?;VcjH!MAI{UCHrV7B%lSbSQfuJtt6Jax4eRLRU-9=70z7{}>#$b<w
zy1^BW>P-DDSQS@#s&>9k0XgXhY$m)u(FM7P^|N90hpVC1{8Qk2!doya@@SPq;kH6s
zG$|t**fYT7QE+zQR#So+`3a7vHae!v7dciKuo}7Y&(|5M&0VJX4&@X<8U1otty*Xv
zXm2I3FAc`t|0VlXg5vQdy&z+kqkR!Wm>M?7(GeBMXrKh&>jqUd>`!#t-0Fq|i=+q5
z*okGeNr20<tVpH^>Rx3{7{*QXFmCI6N4!#mhlL1sb^AGslvN$$3^}2ueDAK|+YFm4
zjMWZNWJUL1cSW>|592m0fsSBUM=<c(8Pq;4%Ul-r8ZDzt7j=?Wddc16&ko~JcwAz7
zcqF=0m#ORut;x3sLy-&-Tc<T0*n6OG_mqO&CVq+6S7=%!eB^hbzbz27B#lk==APY?
zA-5l7e}Ch3_f$u{O;3D=LXAnfgbxcx74V;2f|VV&e7;Yjx8v($Q<9AEPCKO2{_Kmo
zu7B0#k;g}`$aj$gxo_~^9lOMoknVhNoNhX|U*I408U|F0xVrmG;%VCRFe#}_Bv%w}
z_`fwz?Wp8Vr0vO~H_qb|=wZmNF4F6IVx?wDX{H_#<N&YYne%@IDc~eG(PDl}4?U(3
zWfI@6@;KB2H-I`wWSm)AJO&4a*St?K%wuv?C{8*ST9hk2z=P>8y4ru98707+YQQ|R
z_wHGM3*A8Sd3j_lf%quARDoU3@VW03E_BR~*&b~|@OOVSXgx0ZSH6AY$jw&jP{%6U
zXa&FU1->rr7e5<L9dA-T6q#qv;~!q=8xkr?1ebq%b^80i72f_#<cO_KLZ96nz5LIy
z%Xb5*u8)*fu*P91AbueWEBb^S$f1Nlcvi)+c}x?U3nzaLwa8&nI&_cXFsP)=-^K|4
zb>AS&JvzJb6m!e}{W2A{v&g+{78ax<VnGl*tlYcTbhr)(pytas@0LQ?6$Y_ioeZf*
z#9{1*)aRi9-g@eE+EGjZfAcZ@QvhF<FjPKz-C#2OkORN}Nh1{Hmoy;u^HcbQ1Zo&f
z@FWt5?JUzvl|TyMKR3$#A;u}zfeSC)z-~OX{diMx{qf_GuV1L6(JKc1((K=LkxgDJ
z1H=6Yv3jwWN0@?8dPdoL*CrZ!a~No$u$oYt)qw-f69|#5eoN;+W&b=3uvE1|4yn8x
z#U(CBl<&jXND2bN`ojAPf1@joB<Sp{%uRmqZ&(Z;a{`)#;7P6iLoxiLPs&2|zMnCR
z7`e#NAI$%gmkPb?Cz5<Q6!r(_JTu0>&s?QolrVy?0ib*%kB6hDC}RY1pS<n*{-VqV
zrq5Xyqt{t~_TW?7-?_uWW9Yb0ap-_@c#`)cMoku{f-l?tbF{0lA?>U0>r7t$eN=EA
zbVVikGr6OK1<7_;;n{7Bmmh7Z$eU5m16sRd(RPT_+|fFP9l*&H>t*t9t`LK<K$sYx
zJ8Opiu8Ro7LPepe+~VIaFQ`Ob-S6+sR{q{%2bjgsvRb`8RE|Z)WH1Id(f@O!V*Bq1
z-%}<H{=3;6em|cl+=TOuy|;n?j0X7O@a{(Tno9ot@>EH<iT^)6N6+j3;PiZsFuPo=
zmDW*V{hzsx!3+{ba5uR8-p*0H`Fo^T`yFUQ`V)KHC8UwWwRJByeo#c8Q*<`Q1C}aA
zlZsH)1=Kt(p(45l6ajPCU+&y5(Tl}Q6Z%y7cKgowe($dGhKfFH38})N22Nw(vi6eY
z85Dc%t4yo1D=nOZu02?cWDj8(sFwNd_np?Ibn-BjM%XaFPvjF@6I5BZZb8FiuT9JK
zAAaeAUJ+=x*dA9ch?#pme67}U!q8G^zf$*~4gqLjXOW@Fio9qA83$gx-8|FK&1REu
z!c<n=YUfc}#Y&4SHW{|3-f|+#4fsh8eU%GE1!I46{us<WD#;=)oh-Lp07>=#nRLX>
z@uR{&{||l!I0!7#Fq5?Z{30mr4E_BD$jBq%t>QB`IYb4Ic0{-Y^Z&oZ|HX6RLud@<
nn7MKP;laVOJ&nsX_6J{$SfIc?WG@y6{-dd?r&6tCANIchX`=41


From bb749f675833c11063b9d221456f2789d67ecc73 Mon Sep 17 00:00:00 2001
From: Xi Chen <putcncx@gmail.com>
Date: Thu, 24 Aug 2017 11:36:54 -0700
Subject: [PATCH 100/170] fix alignments in graph

---
 .../cluster_train/src/paddle-etcd.graffle     | Bin 5557 -> 5578 bytes
 doc/design/cluster_train/src/paddle-etcd.png  | Bin 50387 -> 50377 bytes
 2 files changed, 0 insertions(+), 0 deletions(-)

diff --git a/doc/design/cluster_train/src/paddle-etcd.graffle b/doc/design/cluster_train/src/paddle-etcd.graffle
index b4be06a0b1c6ba4a84475d2e5d6217b6c259bdc5..f973dc9b9dbf72e9bc31e2d32822916cd281f8d9 100644
GIT binary patch
delta 5458
zcmV-Y6|L&EE6OX7C4ccE%S-HX)f7d+5@U)~la!Zq<-gwpY9T0sl)NuZS0$3bgB|C?
z2jCEA^}ja*w{k^;(DA)5A8{3ZtWeMPyN=iU^6}`fv7&$c@7E8j|61Q$J3QUrs0>{v
zj4Jy__3vA2m5(d6nmHW0v{qX`T(9hZ-)bLLAdOmWWA|g_V}C!2hM#M->+9>P#iXjX
zKVX7kZ9njbG>C4$Lt-lsq1uhQA0eA@>Z#&DXxFi$uOI$cy`;BaS$2+XMezK1EVf!>
z;qcfB0*ih5V>OHdDF0W;uIdjwrx*C6VRa9_H3RG7!ll}3Z6d-+W6h{y4ay=JL{l**
zvRa!a9T#COpnq}x>sCZ(nNNfzgtZl<uSldKk<S=^R&eD%2>$rwW{LR0e0;l4y~>R<
zFWqoldU!j`WM_>ce_j^qIP9<c_Gm!8XugdcFQPpfe8uu=ZTfv0Z{&1o_}#ZJX?H%K
z-MvgMvGlPMIvqDttkaG5y!_U{>d`fS1bGJ8(qszr-GA{e^HR?iX2<GzG`fp?2*qgk
zDMU&SpV~|WRN!JV&<Z<Ru3-H$Q9et$xTD7j(Pevj$G@Vi<2Eg?>(ZrbwChK7`TFk#
zPS5d{7T<@itHXG`4406FAJLX~;m<@T+B+#{yY5GkKiIJXD8W>Q)7@Fp7p`wbyq?aU
z$S9@0wSPqK$2n~Jf%D7vEEme{cySLFWdvZy2D~7nx*7fIZf?7$Cm7Kmb@yy*(>LW-
zy|vZe=$rk$Bc$NY$+m0v*5G${?+BmuJDbO@3D4Iyl*2=5`>(T;U1V;ZY?$r8K&Ely
zUavKai~pToL=R~mtQV)hZvphSonN#l`7nMiNPqor6-%Egnj!sOkP90J2b4iMh2aJx
z`ptF3fz#_hX-^#b!>34owZ>1F2h{5BdG76E)fM4kobptICo`}eIal<%bxVUSuS;)c
zg-`Gt@pwLs22+*U{W%IFK?JPi#DH-KC7f%e;*9E+eaU8guN%*Vj?KlJ)np>dq{8HR
zGJnedUXh7fRX$Z#R86ibzdu#}#u`SSDiYGr@6_0zq_yT+p(v>d<!JXo?ZWDs<y~2!
zpop3Dw$}N?rnQvP6`pilbeq6<x|(Bwuy1w!Yfd61GL~5ntZTmfe~3p){GgV}APW3T
z`Xm`}-}$fOo(}1m?+=z77~{5IcVK#DoPQ`B&w^a!Y3BnAWIeA#zo!*1NhLQf3=xzY
zD}}mA933%8C3;ajrUihHf~rcQs`~C|;F+$|6KM_3jnN)jHsr*`fob3UO4QX_{ADJ-
z?u0|vx^3H*E2t6h_R27-e5u4=ZnD*y4EvDre=d50e%UcRuV0*k0lS|cKzt^tNPk=w
zbAvPo5>lujBc-ZfWT=|1AVr2RpDKiqDp7Qz%Z3h<BhoNK3q^(}8KlUqOp-|q6k=SM
z#%?&#A-##j@zdO!kL9KzAc=B7Gt<2ohEjvbFy^pm8U?sJor{!eXK})O%6c~C3A=;|
zAw?r5&*H&}s)~dam0(!|jgkzNO@ENHVVfj!;E%Zozo42g0}|A&mK`ng$eUBwEdFK%
z^FXo)C$pu>gX5>lPt+)au_-8HRw~i6^B}sw=N7<p{GUDSVM&qBZQu4S_DDhU_(wSE
zz;B(P4}YIu_+Aus-17@`e&IRrY4<H4pAo8TQum5Ru$bGWBl_oWkhOi+4}U=T&+~wG
zB}F;!1(bU1OUE5i_>86g@0{KYVI4-hGkFL<A6h~8Ji1X-1pi~uc=n;{>_d{|_(MnR
zgOFH+7_o>f$0AU6G>*+aR2++_8i=rTod6arqrh^{|FT&?#5gGRE-X9p1N7lTT)U7+
zm2>+7Rj#eb?h7DB&?zSXD1Tz@&-QXUz5pam7qH~J)uHZs;Lj&}VYwl#Esb{!EsD|3
zPcJC*hZK{{8B(($AQ;iHBBy3%4pgu-hasZI-2{&PFvKIv;Y|S=>p5t=??do>+oxth
zPVp4NrY($q3z}&vBNK^DZAMiAkd;+MQ&rVKN^ukdQFNF)WmT4us(;HW(iN>J3Xx?-
z!iZidg$-6-O{pp(ElAh$E`WXExb8E;^{bs8*uXY}AXB(mo(*UyIu)7Ed_o6k&$4eb
z3CzL7bXKq&w=hF=$0=zcmK75|qQ<L036Uy7nvRL2AXQf5^`K1NJ;l?WjN+;Hq<FHf
z5krNJP&G}$M3+j6SASBx_fGLDwP9GRtgH+Jdg0uZgs&ugVg#KW?nHgR$Id_MR>1a9
z2LOfkTryEGn(wkyyqGoSOp4hpuDmovNIq`m6+GX{_v5OPrWF%Uoyn*RWw0U@$w;yS
zGmi{RlZgTsz=lEquu5+F58|c_xCDeEe{=rz(pnjls!kLvYkvyDx}n2JRb*V1G_1p)
zN;Cp98Y8(XdhZ-pMo-0YW#zscS2JV@!7^D<u%>B9DLHP*ao;h=?a`<PHGBbHyBH;r
ztRX^>fno8_CUlHQRU%j?L^3oT{uvA=W&Pd5Bz`iOl<o_Yh9n!%H3VUzsYE6vOqMYD
zj$zWJR!Gam|9^`w{tG}-dYnTT#P*yq)~kjh0UcM6r0JSoyoyC?)!_f+IECUP7F5;b
zB!;Ffmub0peS?eFeJhG+;N`Ae@he)pmVoofyN#Oh(A`GSTJ}E3W_+qBih5U6f+$r*
zRRQA`ZMl&u+tC0yLsDRt)>&L!?!c9&!%wu0rdMIz0e=%VQDFgs*-o-7;i`m)Ea_O&
zBu!Ccb3pms@1q&YlbsdI_go;V8naE&Gz@s6=(16+56ku8dtM*<_GP&|EYF6;h<OjT
zrm8@naN)sdECVxOF{8swMyp~8BZFw=scE_Je4Pu=MK6CCRYlefnHWI$3{_$;f9SyW
z4B60R4SxuRB1`H&<TyZovf}{bo@@`Owu-Q#$wUUqXQ0wDu%vtCaX`uUUhp`;cnlh*
zqL_w}BG#lL#+nS}t{ZE_&`O$B(yUiSvs8`5$6todsFeQhH_nti1GJJ~`~&$#mtI6x
z2N=PGYOw#NK}*pB81Y?p-AefC%&8PbpQ*L+?tkB%QgEr(rCL9lTJKwdHJ~uXzdpT|
z*bBi5FiB=F4y00&o<^?=bWM7gt}V-2*I1WkM*f`SShO=QmRwR{r2jIPR7_InJvfEn
z!6}48%cj{>rk8{Ty^e^y&cn;gQulI~;w5(}4z2LAf)`=dqacuMydVOR5`jttDiP?l
zB7czd#0Ufw<fRcvE)l3ipb~*zD*};c;XnwFFDrc^3{pxADlzEMG3em1k+B!(T9K9f
z<S&Sx1@bpV&jRBsqwCvyN2rlVLe$}@cZ|+X4h~a+oq=ukw_01Xz(aR;|8y{PPf5$1
z1@_OrZ7?Z#n2BC@x47KDU7sDD{@4w+)_>d0B)YjhY_~R=mwtPL3%r3c9t=;(!EhGX
zk(9peU7D#ht=3+<zipk=k&@)zl8mrJzRjoEg*11sI!)tt7HDp_H_VfrbtuJP7I@Y(
z>;2)k4R?6j;_Kcz{juFYBisF1pxx{rb~oMd?9^%}89^gApg4y=j*~_{J=qS=j(_WY
zd(iNb=!3?!)!0S!<i<^k1kpD(l*6ND<Ccrwy*>R=_m9Ydk~B#3h&9GW3+*P|adf?9
z_FHSL?kBTAw|540KfCI9T+6f8>6_yRNt#}Zy4#nRee<|83*7m2_VY*U)Pz4fDJT5Z
zYRZ-grAVTib%_OZ&3e*tW^>0pZGYAGQj*<P-8G>n&-!Z}t~<i|;CiiNZdu$2>3#Xa
zt5s_3zlCX<JwW4N6bGW*nfu%hdx3R(Zll*UiA&>2Tz=w7TqR0;>!D#(RYOr_LsAT^
z;_;QjFulI}LlgC>%*s@J-?z+VMK_RybVb#5LnGy#!sTM+Jug;FfVm4lsDDI(<#^C?
z6*k+vz?oA4oQZCHFSzqvU|S^`wjyLs9bdC|xR^OSK(WMLWYly46z7+ISvbzaG9D)N
zTx%WydgLvQga{%Ko-_~js1@}y;)Roh089h{IjWmeY>{NC=66-|u?wu<_gOZIy06G6
zG}mUWP)UkRLko>V^Tv(9K!0>86VZGl(Ofgpyg^>5(?+gNi_Ar{pvEbIhRLUjB<oe8
z0e2C<G@TM>mqqAUJR0V+xhu>FHg$4AMo1ls$%CM+?*ng-nzM*1<>s7gD!Qz(XrZ~N
zNck2SeioX3E`jJJVmu3CsOa$!Lq$d|1n^S&Vq7T^qeP4nF<vKP;D1Nv7=-AHs0CqH
zVn|C65Jl*Aq=b_aPM!u%3Xws)#Zb7(Ak>zewX|@nA-&n4vb#pnqRob)x-7rpkP96t
z@E{`aC{jmJ{R>q?7-7No!@>;+k(LO9ayJ)pt09&yHzV%488HlLa3$W25Mo?5<lcJ5
zoCxUJYf>}0RI^geo`1#;iEZgWK+STfSRvz-9%HAvWN^=s^E+ixlKQGxlv=`j3GYt=
z@6iL{{edh>l3x{#(n>um_3YiyD5a!P?}SDPZ`UcXh|`|Or2f5M$n&^HX>ZSZ2@|!7
zbR9F7Z!$7;mEFZ5lkwXkSl6qnswf6LXD;AWBj@drXR&Tljeq9`JK_5z`4eeJG>b0(
zMn~>@CHF}sc$8Gc@0Tom!(>TX`BHPhLoLlr*XeoLA{W^6mNsh360L<Si@7|;pa1Pf
ze>%g@Lnz+Ad(oedX0LWHxWxGNfu)8@uq?r{1j`aE(TjnltW^>Jr!YriC<aT3dF0_P
z9o1jdZeGK07=J8vMK%yNR6`~bjB*{{3oMsUC3uunO0X=!@)d&RJ+SeP8#k(6Zrqd`
zH;XrJUiAU0rdD-bMw%{RNitCE5XP7s^T@FuY?&yqLc+SC=B|RC@zh6qMB^FZG+%J$
zlQY$eW8_cwcbsGkru_S*6g*l=c~k!qlfUrDOB{B;1An?48nB26JmKk_m=Y``1?Ab^
z;}L_d@6i`HJr{xO_$Ikm3bINGvL(otAX|d$eL!|dy)LxYE5X_6eR;1GY1I;GOQih=
zBCX)Yzj0=7^wz%v)R7LZ`hIIIxwh}<vR>bgli1-i+OSS`k#%B>lAG^-Y%Aki`YzY`
ztLskO-hcQ$Kkhm_<Cb092Wf8dX|8vqn=8AwcEnu;*!T%8w01T)O=@JGTJcSS?C!nf
zHoa!!c6#?-5=ah|PV;2$dcis?4#wSP!{Lf_TySY%YmJXy-#fxf-46(jQQz2IZ#8zf
z!fzd!lPe3GeBGNJNkQxZcROLLIejn-+#I&gwtu%;jgwzHQquAmHQ6P9oA58mXutNK
zwpn-0<~4VhUk?H{Tc*{j|Kx7{+b|E~2i&cHtj^}iX^UO*mq?z4CImG5{^^%_{Y(w{
zx5PzW`K5mgZ~Tk=uK%1yo*F5QJm2-ZG7E1Q)2o{DsUqv@gp_J1_Jk+D+<~d~{1=VA
z+<(x>@u9fLi{ew80Bcftl2o1~J=_F%&P)Sq7_dgxCs<>Us)7(KjTK!%gvjtvLh6eF
zkMUN4r!)<C%ccQ?_*z8hXDg!1*a##ozwcT$9nkFkKZRZQSoT+vY-qYBW7$wuqOzNL
zv|P0&2x!JjDYlME#V!^5#T47>c7x}$i+@zBx~5>HN`|Ch4QVn=xJaocs>z#1sxN6q
zuH#ZIOSOCnQ@sCmY3M)=K1<K3mzt6gVPfd)jymmC>ZMfb<z4ksczEz;j}ES{Iwy@^
z-R5!P3g^3Yb9E|>n#^&~EU?|$*ctn?x!Fth=_ek9nB$;iPkrJ+h&c{QiZt;c#D5$I
zC7bsX4?@gw5SQlIgAm(=Pd2b89)y_Vpd`(S2O;J-h(FDU4+Gd409W_12O;J-Xcjp2
zAjBL8CEMo{4?^r<AUR8!oDi6N^r;6Sc6h<%KK3Bg;T`czJqR6LbKCq=4?@gw5LfrH
z2cbE)N>kTDEt;e`@gT$;2h9Sf9)E;P{ybsoL5MjH;*Ji+9)y_Vprr5<4?@gw5O?k|
z_8`O@2h9Sf9)wbkgG8qZZ(|~R>n90Q_UZce<<nc9Yo+D+Tih=!_6#L!%=CK9Phc6L
z67t;69gPRNxL&@ab8gQmyg8?IHYhSXaJ$Xj%hK%N(ajFl-m%$1>0j_g{A>%-$nGxZ
zW~*SgxnOyeN-51K1i;U&Kr?JY4EiZ774rzeq$ikTm-ZQ!GpRg9E?Lg-sJZYzQr~$Q
zF-PF8tk6GIR&>S?bc8=uG@@k~f|wsH!w!muh=kC6hsaZRU;2j#$g*Y1KwY_|cgK@n
z5-$VC`stI95*9fv87wWCF=5WPTu#AIsyzfvIPIjlw=enbiXBbtTRqAta)Kap@tD9j
zN{5Gj+qQTN;ay3LeNI@nliv~-e+xu*t%0y1<MLF5UB3?j>0urPqjajr4!zEkC>r~a
zt3Qg0N5APg11sVSorj^Zxa|bCzvFo8P8emiMbD^9EZ?Sp4c449-KniT%EWQwVsCi8
zv~%BSxw?KNC>UEB`pj|9ck1<0i^OS0(qvy>GIS&nz`jhzaT<3lY;^4p9G#`(%n$v&
zzyXk(i&;4S<KeRBQKmBm3UJ_G&y$f86l4yaD5Zdt{#diTD=VBE=y9wg&-rUad8&|S
zJvScmcfo{XCv-Y4J@%dMH+nl?0}jJy&7Y{sxJm<<yW;j@b53*QbZKbXk#jYxqIi=b
ziOlrD@@#s?xJMz~@FFKjb!XC<WA~E@6f1vUTgF||ba65S(gorWY4-eJ<kBEl09vVI
zEqZ+7bU97VC6;`Y{~cPLr5a%}W)5hN*FC;5q}a%o$gDMFg|J_$s#Fz0dz|_H;Mf8#
zmW)_##z{>+MT#;Ruql^b*&q()xnVpT*)i(+gJD3!aF5~iC8rZ}L(BGuw}6?Kw7Y*b
zVER@d3E4-q4JCEy{(2){5I87sPZPv>8DU3o@~x5OI?*j3Wvsg>K~GMcIYdhpHffQJ
z%1_#t*Sxgy!+Ml9`~YUb<x1R-25#J3%&GfwQ5?^9M_pRC?8{!j)@qa06d!x40epmA
z-;41c$~|T@`Rxv?w`e17oaKuJzy*JEY^-IOmgncKSQrM>74VSdQ-{diJ<q+(FEb2L
z-yN~Z58A)Jv!Ct$bP-bW&8^0r<Tnp@zTYXr_I~qD^834YlRw;Q+@S_Ae@5YQ4If%p
z)C?Q06*XXJ?+saWQB0DFIST2F4Ci+nNbSTUr%+_qkHmm7g{sxs(1ls^>xT;e2h#;K
Ilx^q$08*2v^Z)<=

delta 5415
zcmV+?71-*^E43?-C4b&zd5K-FnxZIJVoZ^0lJb(S{P%l6Ed)i7mKViK(^ZKi@L<RJ
z@Bui)S^e+rz^z=<Aas21%ST*AA1l<e{jTHnzI;4BYOLrV|NHgB>c7_a*N)B(HY!8c
z38TuvaeZfNt@3fDRx^h~m)2_QN9&b?ovrp!1=6V1HugSNK7aP3X!yBSyScfkT1={H
z`vWEz)(!%HNQ3BZ2NGL>2-R-X{Rr8NQ%@BKLc5L~ef{vq>J`2F%Cd88D}v`IW3km5
z3x~&65LoQXAFE*$K>5Eyc2$4iIlaIi4XgX`tr=LCmoC*-YZDPp8f!)sYfu)+AexFf
zk=5EX>9`1M0e_A2U$-JU%X}g%A*`(+eMKS_iG0TRvw|!CLGZ^XH%r71=HuIa>Q!!?
zdFh7Z(xba!COc~s`SY?+$6<fnw?_l&Me}XscoFT<;479_Yt!%3cq6Av!yVtgqTTs?
z_VzQm#L_2D=ycpnvCcNy^YU8*t4G)T5#$+UOOq+gj(_7_<)xl2%&yh*X!H>I5Q@?6
zQ;3uvKDC($sKDi9pcQtsT*3NhEPW=DxTD7j(M5ZD*T1H$<2Eg?>(Yg5wC6{3@%rxv
zPS5cc7C(TltHXG`3Kx)tAJLX~>CZ$b+B+#{yY5GkKiIVbD8W>Q)7@Fpm#%L`yq?aU
z$S9@0wSPqK$2n~Jf%DV%EEme{cySLFWdvZy2D~7nx*7fKZf?70rx?-ib@zO0(>LW-
zy|vZe=$rlhW2E5D>9%Y4*5G${{}`Y5JDVr23D4Iyl%peQ`>*rUJ!Ed3ZkX-AK&El)
z-mEo?i@!rJqo*_v){E0WumF17&QDsDd>B6$q<{Xmilt8#&5(ZGlM5RMhm=7%h2aJx
z`ptF3q0{RxwI`1J;WCn6t??7)A+@^uo_lw{>Wc6%PI;=qlNs2KoNKyc-O*sn>(bj<
z;S)SZJf2UZ!Bk~-e~iLN5CJPWF<=}*3Flg=IHS5{U$GhA>&7#oV{`FlHJOMqsW5q-
zjDPaKS9Ak?s;polqhFsYe@iOXK2_ic{*@Z|lZ4h>D-@+O;T!Ef=v-J`v%G676qGNM
z-qt#w*tCjLy0(*Ei|!H(PuFoQ5caLEf5S<nM8-1ffpx=|{}1sni67K58AO49MVFER
z_nrSb?&gr5`~G0TQ88}&bqD5FMu@`k6o1G?p7uOYKm|HpuPW(|=2FRx3nK>Q#!5kM
z5=Tc2Pl;X>kEsBFqoAsisH(m@8hEDb^h7Fyb7QoJmJK;^abV7OzY=w|7Jr$EuRGz;
zweH%s<qB#9q`fkXDqkw`m)mT$Cc{2t<e!V4KwowY&+8YbU_kBX2N0i0DiW8)+<zd=
z!Gjbe$VjOw7#XUjD@c*y3m^g^Rifxbmkk|eMx<ee7K#i{&^%J)Rwl_L1`07QOk+2k
z=!o7%;`nLe&Bt=n5RgPUq?x&13`40wWEgW;G>rnpoz6u{wX-;3K4m?d@`PH#Y>?s*
zlV|bZL{&w?ib}AofksJ&%4WyeuzyVwIrPUwgkMd~mjMatR?Ci-dF0KhYZiaAf_WfW
zgp=7)<-zf?@)I?RU~CG?n3YQO>>`LR@r4C29sg$!dstGW3){DSi#<}1JpK`mI`CU3
z=)>O^m%bN89rxlAU0iw&eA;~rm}i74o7BCg5v=6)=!pLL3uJBI^#c(8^M4|sT}e?c
zdI6;#`_geo6h33A|GS{KLs*2-?o1xSFNRjoy@+lV6~X@)G@gB^I{T0$IsVWQ`yeD1
zAx11B%drTQ9gSnN4;9BEss<t~T_=Fm$|$hhi@$6Z5HSKuy-UlE`~ZFU5Z5k*h318Q
zi7Gc%WcLLSBj}V<02HzIXMcM+9bW*FrVCiIV|A#z9{BUgURrKQYYXEYLyKax^V6%z
z{2|38bB5Gx2na@WtjMXEnFAFp&0&bBaW{crKMnE7Vt7-4#(EAKANUYF-}b3lkW)N`
zuxSgUUxH?u%E&}wQ=3s$0AyuV(NtA6kWw6lKolM3PFa;@r0TMYbbm!Fib7<Wkuahc
zN@0VQS5vBrNDI=nd<bA)I<EU0;ri814{TtYL69ljEYAis6rGC9XFj0=v}f6OnFQuw
zVmd2Wj$4=^df=3FFP0S(KBB5@7)pp#5z=%_Bn7Fm$`~G&$-Ad`+R`YV`bdf=>l!gs
z=m=HQBusRvq<AI8dw=f~uTmR^waUuMFrb&tZAth_!Y4-1$;nRC_j~N{qizLk19bpU
zXwM}R1*7>cOT~*>W6q?Q-QvniLxkkxR$jsLt^7EyDrs6V@zj}&dan#tq#_wfR$%6l
zfoU>P-~!lC2mn^eP5(jMlmVB3P~>mUzg}7^V^Y<Lf@MuXSbsNk7^#YktCEIw_*02S
zU`AskS4Hog<H~4R99LEz%W*YBmJlqH6$NXWhLn=ymK^sTbKD+{YEZ)$;I;RoB$72m
z2r@7%{@H|%5vfW9>x4*#ro%sj!KAFedzi#agGuSJFlk7#0bN57CYnlQQo>{jlkXTN
zU2282T>QWI;(xyYB&Fv$gh6c28DqU_C=$?d1xcE&>BXy9q*e|7PmWV4K4L*tO-^EH
z+G3fOi`O@}cs;P9hz4Hn+7-W|wQC7D&%E2H8Bg796s=_+gKWm9ilV3wMJ0$*Ra6x)
z?!GNIQe`_DAZJJl%+flGi_0Cj@^pAf+h}?f)*Ucm6Mq#JAeikW%Mz|ih{%$THBHhK
zH8uy7-~B$Cp)BpJSbpRJQPr4jil$+}6GfMea(!5?58w0p(6_J3<zaa?EJn;nur*Z$
z`h*J)Mq?S60gD+OW-?k8OBfkMD^E?!h3D&Bc;5H&hf!5z-H?d^gwIeV_VR}gY|oGl
zP1b;5D1Wk~{zHxf^ramK7>{IoK($qb6-_2GP(A~dmVqVRD~|(8zW0L10mgICFcrl#
zj1;jZ6*1OiC=cCOBZgMetdeHEDw?HgBtHH!bVjB0cfWC_<Qbrq{Nf+TFS_(HvO2&B
zCRBs{Hw{{d7Ql$_vb$EoS7%P8DEdsTjd%a<lz)OtwJz2A+0^>L3akNzDgO28wZvWs
zR)9$|dvPF@lC&JXF3>gUX}Y#3Yh7bqni=_Xl4H@%yjXHcg^~WtTv9Pfq4(qzf+wdC
z4lSEzQ<>fn7W6tI@;VPMFH7CaU5c07r8u&}s|vmkvz`TkWTUFQAO?{VgGvl4G3d2o
zkbksf3<47J@)#tS7*t|Vi9xRwgUD~;K?uJf0x2Z|l?e3g2y}SV$XE<?t;kA#^7li}
z0{M%gXMypJ(e>^9W7J3_A?onVJ3;5ChexTv&cHVNTdl2G;E}s`a5fmaXQXA$0{iFR
zHkcGV%tWudTU_qnZqAR-zV8KF>+NO|-GAI3wp$y`E5E(L1>Qm#4~J*ua5xL>NJ`)K
zuFO=LR%^fA-?mQcNJ(;UNk-Tq-{#ZoLYjNmou+X&3pBUe8|LZmI+S8C3q0?c_5SeN
zhC4iK@pW&Vec$e%lkNU2&~EmRx|?ozerC0kjG&PlP@JRhCrKlpoo<KcC-uHPXn%M~
z^g-jsYV09;dg~@dg6JC?%F%JNamPjP-kp7~`^V%^NgAYi%o<~(h4zx}IKJ63`>i!r
z_tROR+dGH4pI>)8uH{+l^v&^uBu%eH-R-NZzIoD_1@8Vl|M9(bX2PGHloS4HHD$|$
zQY6vMy2JvyW<BXRv$<=Ywd(sR$$xIE?wZh(=l!(~*BxPfaI@Aiw=8ai^uB!I)hadi
z-@+u#9-whBiUU#ZynSJZy}-J=u+eLp!lm&PE-!fsSBVneduSL{)lgK~kQ4)}czmfa
zOt0_$*hF2HVVQ~_`<}V1=mwIIuBf_hXr#PVxLm5d=cS4XFn8$(l_;<r4}V&&!e*Nn
zICUz3Gtr&z1-HHntgA%BR)ox{18nvd7c+=w-XPekjG8Wh;{4Jt3dea^#>1qZYtJJ<
zkG!Rk5J3dO1zKIK*I6VH1mx&#PMJlLrH(&T$H(rkem!Q*DC)i<o6ua7wL%RkvJ5S>
z4b2-j0{hT~EJX8dM02e~^M3|;p+XzE3N5l0%_1771R5rvDw3>Mi3Y?){LXYroZS_n
zWASL1&*q^RBiPYdGQzwZwiWQC+?;bQMHjUdEwmODDc^l|pM{p63m|%l7{3KERP=m^
zp&}y}0(dF?eq1RLqeP4nF<vKP;AiF-gy{EC3&L*1kQN{yiqPvw34bRgoGb@U3Xws)
zzfictAk-Fdi;MRe(mM?*yJ{5Ox6@Emm&JD*a-kyy9z+BlMd~Q3f1zp!BTSl%den;g
zf+&Ss)Hzxr49eYH$gPH0y4;L-=w`$)q`|d#H$sSU*^o!;8S^2aYp+So<WkK_HCv7e
z65G#zf|}(}u|mcvJ%7hGb;;mfVq^ZDuqa7=RV+#^;k|_SrNDdiM0kH9i<0D5MWeJ*
z&q_UeH#ABqY1BKRQNqi03e4fO-(yez-tXgiUW>H1XS#%mT1C2!na4L78M?}@;*iPs
zWi_npRaI3K1D-PvaH^5>>d0>~ZBmWj4R*qpN%AMsj%XHL{C{<h+?ProlS=R`sfb@L
zx%c&w1!?75wKOwbr{`shTwu&w+Ndo`wC-hD%;Pcs{BJk<(;0pqLh=6Hi~f8zd$mWw
zCC0B0EHzYuWeJugSe9UkUJNW{t%~?Rh4~R1zl1C?hdkV!qxy>)&1?7#gQc#>2EvAF
z$V7rsuH#36<$vO-1kaL636>>TzCy5k1U5c!<3`oXjhk}g=KhVFSABr0sa0K<k)}&n
zk_;64gE1z@JaX&?TP6yukg#s3xvQYxc<Q4)qw$PznlCu>$(icKG4iMT2TrmDQ~u*p
z3Z5;cys3YI$zOQnB@TPs0bLFaxQ_@d@pMj13GO8Y<$u}U^AUrt@6i`HJr{wjY`kKS
zRZ5U8LAC_h5@a6(vP0^1p|xHK&LTWM0ez)NtCmPxBJDpAX$9B)jWc_rSN<KMj&yk4
z_gibpoqfkw_4;<4#4exFhIP7!tW#r@TzmI@TNz)`ceT#nTX*XA#+UhVci|a#?9M(&
zbCXYVy?-O!UfaF3W9}Zn#t&$rwe!JQQX})simwr57w;uk=`|a7(~I|#Kys*bnx}Jj
z3)We2Fzz-R4p*d;f*S){Ykc(j{xM$YazJQ|`o`XRtFg-!e(Ttr+*jD->)z~03Stkq
zs|j1p>4RC|=CFOfz13=*{@j(4mdB{cZur}Te}73v2etpS&AMwgZ@7#6dJwSLGObqq
z2Y2P)hIte};I8~*bv94WTI`0uMDjc|A)wj!&%Vs-XKKj5B`)&HZ~VLWy1&Tp`oGi2
zQzNC3=evGa=HBbY^s1(Os>r%JA*C9M9pT9@cVMc$)mz3MGc<BsDBkBpaoHxonpB=7
zm47EmPd5RcGt<Bt2CR|w3Dy{-svrbQV?|dGAu>Fakosc4WBgOEH!Mv9UeYvR5MPT3
z{cJ^a5gUP|<qurTrURP2|EI9)p3DA9k_}DQWGox1N>p|;kCv;}1Od%>DaF=Nso14r
zznEfM-EQ#v>>|~wt|=I)k|8NrLz)Z|E`L(0iE8quk?KpDk?Xir%Tg_0!W8d+T^c%2
zgU{0Mv{#mtga{KuXLr<TuTn3iQZFB>m%_t?H+yt&bKN;@{OmSQ5<fUQ((Uz`G-@)h
zL9@ViYh!op%;t7K*{7el4`N<}l0Ef_`yl2uC@Ip!eGu~+lx*Hl+y^nQL0p<+_kTfb
z7e3j*p12QUUW1Y}C+>ro*C75hBR&jZX8>H?$L@oe*PvP8)O`^18kB6GPuvHwgMs8M
zWpYAb^3kX6gV^B(m;2a#P>1)zGj$(ye8X+?Pu&MGuR&bh$L@pXd@4=-3bkmG=EQvv
z^BOb@oVpJ(`SXOS`yl2uh&wtMyMGU2UW1atPuvGFuR+|o$Jl)k^BOb@oVpK6c?}Ys
zCcKS_?5&?9OxdUF+gHoCJl9Ig^S8KPSnL)`)|lz_n4iEhLM7yPdv-LQ=-+zrj?TF~
zr|{;S($k>G?7;0d_pVB_gJ(B8SbN822c>ht7jZ5~BfGnpkFA2;=7Qx>DqW>CqYwZ;
zw*t+u2{GuWuvE+=1e2a%j$PPcSk9!fj9jvu;aPLxf26+iGGdOvU0I=js;uaYA?OHy
zs%S*ZFa$9_ScDxE4G{^U#}1KYcVGI42*|Q!%0OMYrT4&-Z4xg7*ZSd;pAr^1EEy~;
znK5C`w_HxaP^vuyO*rkOxwkL*?us2v99TWdDRP1!bMctKH%f;`e%rQq4B=f#jD1d6
zca!cC7k~GN>{$b0L&oK)2)q6O0@A}g3P$NvPaJxaCs8!^Ay<DC6_0+?a|Tw#7dj6^
zV{zLFY=77B)}1iQYKxvxmsq|{0UNA2Pr6fEdz6Xe#>L+7dTGzT({gqFNKi1gH1wI{
zo*nA-Qj5fCM$%+oUovzg5x~Ao#&H@CENpb+4;-C^<IE5J{lEc`n~PaE{^Q}Y?@^{R
z1qyKJ-^`Ps6BI3uoG7J$lm1w<ylX3*8|ZPYW6$|(M0u)^XFWF_@(;m;6DM>!E<N#`
z?l*cjli?E^96hU|c#|QC%=E$XZ2G{s$06PDA}2_7XVRHt-;)>=D}Pg4#$D2MaWVwb
z1>z8C_WW?<(jZp=TB&2*_xRN5a+;h=Ecqz^JF+?pHNs@f9MT@Idr4!g5JsEhrZ5n@
zxQGxeXo+**ADmdg!jiGd%{8fMr$|MFbR!eQ0X#Q=XJb0XTYoSNXc+D@guddmVQx^_
z{_qaa@``r1229fmB!40Mh_<1mE<IRp<O>4(1lDPSI4>jY0#3d)vRo&+<D-o0jS}4C
z#F;C!RAG}A$te4zeR-Wrt39klX~Pd-{#&fX{b=CE&Bc7VFBZk|Y<JY9b<4i$1#Fcz
zSx51)r(>Uwu;+U*(nGn&TqeKWVciyO#Er9fu>i7Qc8&Eb(|_{(j1>#RfVu_-vUutc
zxx4SVcll+8A?mv$Htj+C*LM%H-JdQ(O1`<(c#!<&(eBQJB5WTtA0&UU_b~aRt;Pdt
z05fM4F4pj&bxqB%;aX7xhW7rDMHj^+nV6%H&S-Fcw}I4dJaP&}_WVc;C{w6ftqon6
RFTZ~He-RO~_XK0;002NEntT8N

diff --git a/doc/design/cluster_train/src/paddle-etcd.png b/doc/design/cluster_train/src/paddle-etcd.png
index dad67a277296ff1719a968abddafbcc1277721c7..57981ceb4b94f0f7d6dfa63f3d28c0402bf9cc31 100644
GIT binary patch
delta 21109
zcmY(rcOcc@A3tt|YrFQ|d+(XOx9sdKk?av(_MX|9p~xtkvbU^=3fY8YCoA$hx89%c
z_xt-3_r6|dJkL3gb<D)0zK=((YxIQrx7ZuHZth^C!51SH(~ziy+cd*gX@fn@7Wae5
zFuU6N*GUyb#kuR`9{Ve#+4L5op`g9@SM2=iVJ-AQ==02s(9HR@fQi4ak?*Iavmf3%
zMn8oIbWLpJ>_n~_2zu3763E;VoVVh&u8Fcy7)vX0Q;8mxMqI;8Jn!kF8sii}RnIvi
zUQ5;xmC@I%=a<TfwGo!Z7&@+_6nz(WuCG*{+fb&QD!VvH$f0tb=(8sm+nl1byD1g5
z#fc(5L>@S1(BO|P)lIjE+Uu<sN>=u~P9?XAOy`~@r85sdmDl%F3XENib?WnpfAy-b
zv!v_TDW>sxW7Qmo$(@(0YMN`dP!b0Jr07?--WY1%>^n+STVhJg0uIMMIV|+ZFg1D!
zWUHzlyo4I9SQ62E(d67k@@vDI;`bBCnH22sr1tnxhF*n#{ez4_9flf<VZq!uGGveE
zP1d{DYh+?)y%R4Jay8&g!BTQ3)%N5MTscKGuIFXRoJtC{St&)cd*4D|BScF<Mlu{j
z#xqQ|gQvUCJ{ha6|4NlM5KR&0MUaTzyw;7ic6q#R`xf~w=kf#T7oLKNdCt>=NgZ*A
zGJmSVMT<R$C}914P5lA-DFS`i?X!%D+<oE^0k0yy*?daMCyh_)XQpa+Ui}!wjyy56
zk_FHF+mq+WFRJ_Qyysg+IvIxAji*-Y?T5YYz`bK=utQWHVhgr$g09P<)GCLq5-51Q
zttJzuq9VcP;Kp=e>JXL=i(s|U;2BrDgOAr?BOZ7(M4)7?r`yYk`s48||FrZ=dWEhd
zwG#{MdW}00*v9pg1fK@VDimYswfiO_P%*lhwQ-5&{hi!7E8q4R4Ix7%Xat4Q^Znuj
z?Bz8j=<9Kq*9&fq6IOxWNZfqm4ELfLbnAL-YgYpkS`(0wP0e#@WX3TPhcoO6wfJ#&
z^yM3|jj7lgbI(w1_31g*y=|rh(bLfbqwI4tkQ@|aaOCklpD`X`KclThpQf3It8aIP
z8!$V_y(85Az(IAZFJTUGEozDvY#!EdPNE9ibs#(7p*}QYzwp!{2vn&fbMI-Prs%<H
zc>8Zec$Da)Fl;9{U4%yCG0@DXO{17Hke@5+2_D{7qHzn{xr7<Bj_0^+M4Uz#?b=GB
zW@5?fC^-_oPI=nDZJJgakTt@u;B(YU;AYVAYlJD4dEvnt6iv2en|z)Yo<S~!>4JCi
zLL74EXlt1&n@#P+62PF|wM8?RcW*+@$1a=V4?8Xy{z=rFXGM=g&Kp+B_|pAbmdOy+
zScwk0g#oNF&i3;fA^zsjX=TZ`=xSfO(YOb(Ap_NevJ=#H1!($=W1I0RQBPBF(}=Yq
z#B1XNQ@t3iA##5BP`XO|@tuf2;s#~%jc)ILyl$`L)Y=Kl>?KO_72u4J$eef6++(3^
zp^Wl2tZS?q3Cj|Z2|=y6Bb8jCltti-j%CGEN=(L_R)ECTK_tmM?1H<mghT~HW}EoT
zM5}`K)}F4)q`wj?Qn}N$jCPN>EsNhfN>Tt<Y0P&X8wx_I#<0?H-7tGEd(Mk;MwnCh
zDHW$g?bx_4yl!)PwJtvG(nX?3VxM;K+^BB6@G{j}-u2{@U0$>=G28u)TY0DFMtK}j
z22u6oO7#Lh8Pd5{0<Q%<*i;lI)g1Ve%VXfUil6LGDJ6Z_F;KY(?-f!;jQ7<H+F+o3
zBmdF+Jr1hd7>tP2x8o~(Uo%-%YTMhK)SE3pY@sl>uNNxsG|iML{@lr7BO2*Y3SQJX
z46Q|RP0Nd^mk1-jCUi7@p!8^h3faPgW%<KTm9oIA#BoN`ymVxFc*$Cvko}_R&!)N*
zo=Zjh<7)fl=T&xj$^qiuyaWEk4y5*6M+4I_^cs+!ECc@cX*oTdT%6&=93HD7jxlEH
z9k!~zs5n8zo;!KUP95Dzhl&Bm{RwKBcHcXFSQ|AA2U)lFF%L8iB^vO{9GM$kNHVI@
zF;H2@24zph&2Q98oKOl3M{AyaH>=};txB^0-g6mC5rZm@SdSfs>-)<n$T)BJuLw3L
zuZJP#JnE<G7|>JV8kx$1faX-$KjJbQ>?84`d3dxd{0vV<xGx0V(NEaB3~*#kV=B>x
z`4t^D@+HlOHr1kWesG=n8KCu(hsVmpltpIiawe;=d`pQ5;|686TvB<}=y6dx6zCgs
zw{oP&dLtg}^+~c!-A5y5*6D%W)_&S6Xd;FD8F(#cQNkTZBJqkB+`}TIyd$?Ju!uJO
z<8az+BYs8HTF~B6<R|AeO=GKB55dN+tdC$kz7F_tF=$}QyR0c6gJ&XD!?~%n>1#Tm
zgxI#=F3iZwn?I=9mPUqJZ@P|T){Y+he)O>y{e8{5lB7*TOV>vDzt7pDB*{PzH<ss{
zjT!XBGn;Jpom^-$yoq%e=IhYgnErk!Eewr0jWe9M)~t=$!2Mm8CU^njcl6;-tZAj=
z39&t5LMUHZ!$s*=&9$khmD=^JQ3mq(-@t#Ger~=0FhkA=P0*Xt_>+dvf8VTP?Abn&
z{B3Gw^lMXENC=`9h?3nVc<N)Fmlajy$U+~!2jkx!dsR_%-&^XUO5M3E_u=cAhzYr{
z@XZuqtx)=0*i)Et#+Qt*l$mC4@1FWufcD5q5$QR7`nJQ_2)EM`5Up;^?)>%Hyl^7x
zlUe#ug;25Ug`d-g|32;i&NLZK3uzO(6W5={sL+RpT?rYjR$uD7DNGwLEG~b3khLC`
zAvXE78i{}6d$4|rWnlGWU!;^(qSZ&mP;K08LOm>hW66J|bL-i|tznKKGUd1ZM@BFw
z&aFt3V(3qxkIMz~7sOW%JAR%dCy<y}q^B0E<d@#*Vy>Ujk*aM!co(Ik8qeFr9<EGg
z3Hi)ui@8q^iRm^rJF!TUCbu}<y*ZsJ4!z=_i`SYv_&9i(N%A4<=cLpdpO(oMa4ux!
zdvv{hBVTwd(0K{#0Ds7_@OnQ;X04^Cd~sp)vsEGxJ1-$MHG5Lziv?Rz0HgKi^~PRJ
z?)m|)HywRnX7mciVKQEupxh+m_#Rn8zMqVK)HLE**KDg(?~@s$qT>Wyti@ig?>5co
zP`x1IOUwAs`BS$!LrYNo)RRZ==dU36W&O;9mwBw2%KOLP`QaQ2asqTpSl<sj&zHY?
zbkppv*^_q${lfa*ABzuNe#^P7**aGyHH-525bI<2?ovj*vchAfYmB$uO%2*ihg1Vv
zaemdj+pVguZMm=3eZKqzjN4>dir?N!H>B46E!+G;*r3w6aYFQ~T_TUd)vqtsAK0uF
zC9UJXMV>R<6yIKNgJC4zoew!*I$!kukQRFIcDo3m0)|@vOOU>%;RVXDl7CE<+^sB2
zm60s}y^?^IX$DKZU*<@P6+;D4h`o&3*Lfq(kb#&qZLy`yW!@Sy6b{1GJsM#b0$nBU
z2I<YAWGXVhs0X48OhUCvjk0OT5^&mLm*}1L(iQO99P3bZ-0LZ<=^VL0)hkdpYUP+K
zS<gbw<`tVOC7E?9F>332P_UzXx)xF_CCbZ4NUej9)BcPLeULtb=Io!-b`3ONpNz$x
z+V;mRLAN&yvQ?N;7HZwwMNb~J@3*9det(e@Gq4VI>3%q;ekI}KaP#X6sI<#z<Y5~B
z#i{3tghsm3$7n=M25IN)v*fKzEipkZIDPBWs&vY4xSOUF2D5H&4xx%aX(>tKYf(f2
z*GOmJ_?6zP>sNpzy`r$xH&q>Ox_%!}eWW;y*<?v)1aJMy+r9_?nC>(3Ob@@MwQXDh
zc4Iby)^VsTi2aAXP7l9096RgigXgyDT_vW$ZluJe6#Sv+E9auXEq2-X%GY}M(}enx
zj9L9OA2(LyetdaD7bH9zsg-o%0iLbang)$>R3v@;E(P7=E?4#cE#AXkfjyxcwlVqQ
z%k{q7E~(eo+Cz`iooS!8Hl0U19c#x5Loe-9Bx3l2@F)b)8i|TChnmKW55yPs+LTWe
zVvc2J<=lVoHR@i33Ag$lJkp<7oPLb#v%iq#^T@9NYw^dcG|F7-qUao}p!o6N(x+dz
z4=5Mu+j62IzIz!u>+ji$ulun1-0FJK!SZ}qnavk#;B3k&dp=XAH9@|1F(vYQed}c2
zxj5SA&n3IaIq~i`X?GtM*W-M={IjnfvQ{tq1wS-`$w()2DY9Z1gkZDjNG8;0us?69
z6FkJ2z24yLV$~At=gj1@36Rkb<?k3e|B%=&=+A(1vBNQBNfX&+;m)sKWcQ^+h`!rv
z7%M#mC?tjLIOs=hY(p<2`~?$V%e)iSCN|q`ZR;}Si;dCE-JU<5N4*&Kp79^k&D0oi
z7}yGZ;0bIR!&sqzF1MNJ1g4earxkn(8426)mA-S^RLPsvhvA}I!dC_y$5M&!NBrqG
z!4!jLO{H~%DQHNoec#?kS*>kR>$^6^+Hkh-TktZce&QPNbSqo#Y0B;4lDIkvP4>Q)
z)6(O9+zxm&<6>}N{-$z5PDkUUWVLzgqeWbGdF6D$C7+qv`XNdNHgxwQ<i+4=hbMdW
z7|C6rJ9EU#DCrLuDK57~4#4K}dox2fz!~ae{qT?iyG$jSk<Cc|XEd4BNIUD3O{1(F
z-;e3q=I+fT`2kLKGj;9SqVt;<8>#qX)B8^b;s~Xa)52!M_@15z_=Wq2xw%LcNebs;
zpyJco$y_ixiE}XJ4JNX-KFOr}lc?8t3$NruevR!w$_I<0I~I&wPt`JZ>tj@yCESKw
zL;M$*o1=dSFZ!*or*6>%?K~fNP#=c1$x_je?p@fI!1c@=+5frUi&@(hs=SfFxsj!M
zKH}h0>IX-!zdrB}xjJPicvl!SJJR)d;r6)tH?LHN*+5ot;8vck{|I+ENk&ohCjzuE
zE{2+uzKKMRF*Lan4?TW{LTEsP-{m+68g&=YU_(J73Ncka-ve8yEyY)<ciZ56!L7E2
zZbY8$L}2sUkmL>SlVq6A4cwAh2g^GxGBU6{H8X$omSwBy=wR&S`3h7Fh9KLc>(YsQ
zJqc5p;BiyI7a!xu@%7iq#hFj;h8Cm^gPJTvVq*}*PKBG$RZzbc%e?FS022}z=rTXQ
zE2<m4CONpE1l8*fQ=NV3{*&eJd9N+XQ+r6v_7k3o<QEP;FH@NS&Pahror&B25)9$O
zMItZqqwK@qsOC&=d~QHHdasna)sQ!#Cl@yJ=ax~_RQ=caS}ff<p`4}ilipS0Xza&j
zo9=Ki>p2a{BAM*tA50c_ve%EH?6+(uRS7dZN(HM$iqf_z>suq_j7?v_TB9+d<*CcM
zz!}0_tJzc~fZ?GunT+(LlwQE@)$!Y>^qG>7;A5XJ`O_)zqXnl$tfa-E!OgbloRu&y
zY)JIgr0Qd%BM8{=B3<iW_>vzHHeB+BOTy_{V|rK($tv!qi0WKOCv8KkS@B;Eqz>Ac
z7~gAB=CGArsefN;7t4|LFtg=qxqBe%(oiS4UEsZb%0`?d#<-ZmURMbx+f`nL{%k|L
zMn=)U#666oOEjE?M5~318_WS2d;NBcGU-g8n}=uPlaMQ2tJ)&<v}<s99gd321_s0H
z>|=TT5*Flp9sXWF2GpP!<oJ@bFV@gqD}L^>>c?vgtEF^(Tvt9wSn=NMoW6r3CQ$^*
z?8*LggGfknNa{-RhS7Ia@IC!m4OxmtILo5cdK(7sN_N3SNMV8~-EGtMzD!5at@{C@
zIV@?S$T&kCb5X5{P3NC*ehHN($MM?n!f01%lFZRef&$c;mcyV}Ug6x<)V2?(_|j>Q
zPh6NSX9L{6E)vv5t8(zpCdvtil~|Ev*TzbDA2aZpkK)`x3vmCrX4F$&^=armH~KGP
zuG|LqLBWA161vzi0vnC?i-KDr!e;DhF4AJ!p&eN#S$c~{ZJJoS%q}#jB!oYXt%V)&
zdb{;QW0am3h%N^}8|*CHHZVCtJLSZRMoMO;20n51vf9P@co=r|j_jH8D%t+90mU~@
zd6Z=49+KISCC6qyyCWD<=kX9#eacV9fdWje2?CW(&*qc9{%CbQ*)8{zBb{jZ>XKKB
zya7y%_g6gDNYWk*8c8!Jxw{T#iy7r3fEG3GQ!||Up>rr!q<Ueiv|7(Z$;Wn7!9a*I
z-G53C7FkJwKPb5IJT96s6gwo6n``b4EfI<_KOf;NNvP6{e*nHR=6XcZ*Et0vNhYkc
zcbwrb8N*Uj_0Ff5d2NmBoovIMeE6SB=o=jG3H72enbgS%rISRa^s~mS9z7*>wC}qU
ze1j7mf6oF+lPiL;H^OOe(BE?t<A{kQoqZA&GtCcpOD*NcEhE2f3hQUoE3AJ~Nmp<a
zfE0Olb>4le?esF#i7bw@oJ9YL11oxVfw@Pg$hieGr4F(RHYv7@od|j6a$cvlvr;q0
z<TZRG4T(u-J!UsR6H}JHMX(`mSnO>9Cvr?^#r<w5t3pXGFN?s9$$=CreqKh(fd7S*
z+#(59d>d9K(FR7K*r0ygdGe8!FsUm{3>lA*aURyAkZgNgs=!P`D<_S@ynfQ}9^2FB
zAI25_o+>X0!``bmG`?W)v(cUs=Dd#z@v4^*0-_tCQPQELD$nR8SEcC3=4F}D*u9cI
zPKD^%xHq%NDw&-x+EfjBo@=71^Yhm|ZJ^m&bA1lyHd>>}luy)pLy%o>vc|<6Ty*{g
zZ%id#oUZ=Kp#4#qHg}Y+6+siBF1MA+Yk?X2SiaGf(BC}C4w<EVql1fD>iI~{G>5cF
z6&oa_jnm8zT6?cp^dgWdTu$}a(`!@m$!{U09^PBu8lhMtfo<xUtDnXFU2KPf6(OIj
zIw;RZsWH@rNuK~_Z{75xjR%YCOYqMLGI!4UFtbE!X9~=tT{i>i&ZyjeieA2=dB?wm
zX(S<xNOCs1<8w_EIn|o5)|^t}U*;oMP2|!sllDOh?(=nbQP87XkZX-WZ5n#^;*fi-
zzP=4XsahDOE&>~K-7!2&z9NlRF-_)Yr3EJlR`))-h@$7_4bNr;BukS{hN>7HSw&Ug
zP?u6<yGp0l42IvT(;tPz!~0jag1eZbjMhZO8iQ#q8T`w6N~DpuCpbS;a(E+3jnvUt
zA&fNVFN=dt5c8tDI@%MCI@UT)%>s&1W1XX0KhLNrU4j`Q0+xKkB&Zv~kiP$Ve&Vx@
zqj4Wo>9_{w)cRi@I8~08D>)`X1E-}zJe6XE+es5TdCx}FBDKHn0chArI@8Q@;xEJ)
zjKIQ&q;Qdc;JkSMffsx_hGL`!0_Y*}P>jvZ|ND9zbC<rUr0(}xD)**oz{6I|Xk`|J
z97fXky004=arCbu@nuH7kdzsbA=<T>A-AzT5h0|bXs93){-yd0Vze9psuzeivGg@^
zFHV2mQQZp4VN&YQBJfW{fv)V0@CxrDfG^k|%Z$)IJLS8J216G|>@OOlnNq~p8^(Bl
zxK)4u+pRR(@Uj$1*Qe^N>Tmu?&(?@=t?55%8VBGY7I1;q&-Wj)xcOHNzUep_MMgyr
z@O+6iKl*Q^FaNIQGL^9!uGejLzxP|W>+W73^^bu*V9*6mbwiL0cIWIp&AR}tdUPB^
zLkkbzOjNvl@@rSbWvZmo1CWVJpkWHmJ7j3z{WY7L-gUKGUH83ynAY5LkmNGx6hNo;
z_pJFHw|{<@d%XMp@`9!N&(*_S>7V05ij(<|-n|0sDtIC9>JR|>w15wH&uiXpJ(C5T
zvgpZy#0Tl0QA)-hZEG<$9`>qIdv#F%>5T3N-fRmm^(2SQx2qA@E<)G{_z#dLnFdMg
z(S+DaIr1COh}GUmvjD*76esb_;i8v(h{~{pfzvOM)(jK{>KDB{5vTE7=;ntt;y6wI
zvz6~r6tw#)^yZ4edQ$c_7(KG2SdZuKle3PU(nP01U*p+1=VB;CWfBlt3VeD-SiIk&
z=P*D%A+gk6nBM~q*e6Y@j}(f6Pu}x&9QA?6>0DJ+zkWBRfbF}#;0s8Qb9iPVUsnKV
zu6f9>Pp>E@n!ur8Z^%SLqWlW*GP2HQfYH<X4ztZ4<pDMZu#;ytq4{RHm!AL_tr>;V
zE>g(UCS2sRX=LpfPpgILPF-}IGG!QVf%l|@nViPG^2Y4C_r-{%Dax9rj(H6r`%loo
zW`R%WKH7Ok7$XCkpU%aCPF<cES+MDAsAZ(3JY)FqBFBo-SaCq|ilfF3&F;s|`O1e~
zAhAahV}HL!uw>|R%-Zzg1N29+$QddMWOWN#4aZ2cW0x?#1smcxe?JaG;gHRbbvx|p
zYmeqn#%cnjuRG7~vl`2X-W>JgPaBV0&h_+hF6>Mx;^U58NY)oDf!V6Di?aXHAA^gX
zFZqzc`~e&nZ(gH`+2P0<MTBTIYT%JZ2YrRvM3$)@%6Gz5V(m94ldhm5ptoR!>`%5l
zvw|TPja3wQX|PC47-)sf$PDVL8a(O6_coM2l0v;wC^B=(i@vj`WlJ(ch%I}2Ep!|A
zJZ(f1R*JPh*n@E|+td!GWi0+nPvPQ^o(|0<QE!-hjqy)VJ>DPglI=dHSI#wwFe;%2
zY#`WH*zF4RgHZR2g*T*aI#?V3BDuDS=nYHCWSZ5{iWxR`LzW9{c|2nLwFW|Nl2bq-
z(v`0N>BQntDZ<6j1Axb(yOw+1AgTF)d6EuRwP}`V&orqu?pR#;1`zOCzCL0)O~B+-
z!jEAQUE@_cr_X)N@dxlUY`zZuNR2wf=zG&p?jP4e-_P;Ty}mxS;$S%Gl<qVu8mWXQ
zcT8qAD0jf(huWgw<||$2HboKCvqPpkHPLs#h$`h7^^6*U+MmF5SQqwLuGw@>7vy5-
z9~yKL2u@}HrKV=Ie8AJXcmlSafwj1Ysck3hy9JclRzVN!D!MM#v7PD_eBHsuI9H10
zTI2~0QK&Pt6{)?s!2fMLqJCj$7^O0_?_NoFrg<~4K$IK3tnf)WuPhqQlXOz2*TQl<
z&WqSeUD#0a#`Bv$$o$q4Tk_eDsy`0#T&{^4r($jly}j}0R%E#7tRfXUNTrZD2cpa;
zV{`Y9>TECEge9cbEP6#_bQF5(M-#Dae?fPC5sgHXt2z<Zl7zAgxfwE(ZYpW6<s|J{
z1(2ls1YGe;5#1X?QtWFf3RsVixW{teG_CO@o0!-KajJ)*xzg7S(>@s&p3xE^Zc<Z{
zduSY0+7b=03`*6MT$HZSSB;K@0I;&1BhX*su(TA~hM-;A%`=83Iu6mUH`-@GxmSSZ
zH0u`2>_sqQvThRB8{=3yz`56_C`#kJ)`1#wZtXYk7@04)Osi#vqgGzXKxptej*yAK
zA$KYc-oJ4P_D`OcIXgGUP>esGW`f#^@uV>YN-RY8RIs@`)Lp`E|I;hoz{b4W(=9G4
zV~Zsi=QPO0;DD9r2}!dg2`8y23)G`1liQG=&{vrbnv8H7-19wLE;PauHLiZH7+ac#
zne99SxG6bBl)f@51|H7fO5iq4a73=8zF~PVXybs#9s4=p+r&`hm8gAG+zDcj!0CG)
zpN3VvM)!ck$7t?1I+dGTe;+Grs+^6$F<LdteYIKWv4$PQ$>i+J1M|J&3x5WCoFMtJ
z&MAo04GGz<k+P1=U*8`ENO<djM_HY_df#L4-;zc7@SW$@_gbJD<tdBRJ}CDtuxAgI
zl~gUzq`Xa)xx&;YD*w1$4Cyc%V-^ob46>dL8h1QlsbbOk13s+c<eQ7p8~cFs*<G%p
z*g0+|=&~<>bk-`^pvj^%TEEEUxsPV48?Rc47fQzy`o^I1gXHO^f%yA@(Oa2vA)tXn
zbXdP?I~o_rKMg2JYgx^iq0fxL#edBj<h^)$+aNKa2eytbc_!R=ch2RIZyc4?F4=w>
zgmxrZgTrFO)ZR+fETsG~zAx+9qj~rA?CawpftYKJ+3}Zf9&TRKcPxXK+Ub9B2_+_d
zBl(l^@hW6XL!}DazeP3Z^y#y>raEFv0ENfe^F*9B^;x1h0Rrt*go8yW_aD>>L@bVP
zr{&-9wcb*X)_6sYx|uj-A#XbUMQCa|^cjJT<W9@gCT}*O<PYAs0684(h_Q=u*>e34
zLse!Yd8?Y34_x0tYjPq%dgW*I(71VE9C7L)kPJ350teOpUmT=Y)U$H?=lmpZd^-5A
zz8L@iZ%<UuPr?@dbKlFeyfPxtV4WVrXEozhnNa5CKakaodBl+pcA|#ng+;eoBS29Y
zLAU;g>9jR^0vn&wkt0yk|MTl_Ft7(1I%Uc7fAeB*93|vzT!PVzB6&4}BMTYE%$D}P
z!we`dk6IrsJr$GrAOcS7zOpy}x7AbS5ZY%8?+I3Svme5m4~_fmU(E3TZy3YS7P7+t
zN_`F*gXZG-x8v-(pdF(^uA^XBqhgi+UxXR1rLamDaypGc8&DFwF+ME;hG^T3bD?y+
z3Ns_`e@K%5!DmbVrB7ZHkE+nQ02YN(xE)D1$lhI_leMxnUaxHOzQiE*N>l99Tv~^Z
zFd(z}sD(~tnZYRho?l-aZxDUHrx!e)D{wdHl--;Mf3hJE%heC;L_29nCNlt`JUKsD
zYIOc^u+;9KJFEnAPz&=P9QyG6@p9mHGPyv+EsoUkZab~0+Z@1CV^Fm}Gn8@W?L7JM
zS(bY7(krz4DRPV1^IplVy>Lu4$~X0Mhk|3=GjFVA@#Wztph}`>9YI@=?^~d}A{t@#
z6aQuP78O>mYxABZ2O(Eq{eG~0cSl$&z^5_OOhy_`EB&t^!_e-rl#FY{iD~9##p6ma
z@cmb(xM-(P$QPtqiUb5KkLCt7_0rq|MS`JZNsJBSU1xjV`v=m&7e@%izq9w{k`$Q!
zzJJpnJ_~a>*QTkLaxp-`5$gDJ_2X-kH|}mWsJb0)$86EfD2xyP5V~GO&j69`H2|QQ
zBdaePKyf+}W=*EO7#|&ov7m%v&HuR~xF~sxM}<Ygm(LBsIh*@pTw~5BQInnxt?BpH
zrd+GPn9y;^2XO8KgTTe3?!ozFU?UuE{I4>~b?AwN<aRr~FyLy%JwSx-T=M(+qPy5{
zITFwyRnYBk{Y-1UjR=wh77)D=V#Nl4G0K~R(uhtP{{4PdPHx2?cS?}^1flT*_X4;j
z?!_`8GMAe~U73;q5c71Nexwk6rR-n$V%$~VLDU=tm$hW6wYO)<k`hf&B6%7E=(`Un
zmmd6<tjKeEwgTPK)c*o_EtyK};P+wocCl$|%VEHhl(_!VL#=j&F8{(ah_}WX03veP
z(BS5*hv&7~xtA;in883n-7fTBe%*e+nMR38?K35>O6@r?S8uOqR%3?HnQ?z@cBo|w
zVpDm!e@GQu2^QLptu1`F0=5)@NZW9OntmU&=iC+g?nZSw3${}uO!4Q6Pnnu;&c4!l
zjWForUf}*d+o!612k9|{a~H<s&82Z|+KK*MW0g$dT8}Ptbf0Zv>WF+Yyt(>?O<S(c
zn&*&X>8F0r{Lz>bN23?^Qi^5?C(00>N%YwGgnEhSigI6<7J&Ir4m<t(G)^pnyKm0&
z0d!{Q*)x;cZ!##-pI!8KG@jVuW)DXv92vlwD(QMl1%t}fZFXN(sIZs6jTL?K<dcyJ
z3v;3lz%CtH3ZJ(+wJV;O4LN=Kv35M8D~n4d>IO7wI|rra%oOR95qz18RdD*&wv=2_
zsf6WBBM$QZrQ=ZPG44j6wpaZFiuiYZetj~rezM;zwRpck9MA&;9SO&NT(QkP&SlpG
zP!3S|8Ee?^<2RjWlKV|FXP|b@4ju$`j@@(mG!}CW;N$ha2c*a;@j^9-O(RBxsj+WI
zI)>Cc^UM8*wRcEO>6<FhZ7q#d9|1<g*L6`Ulx|{V<diKp37eJP;@DmkJd{C6v~LCG
zk;C#+Qui>Oe$u;-C0x%Guz6;Ge^?3r2~DWBIs+hBH+oy1q4SJ34q>_g-@!mjKsl(r
zvTiGAK7?zraGp60JzPu2KA5JWD9Dv^>Ah~D(4;sM-#bKqLf=`e6gfSZsM6F01@n%|
zqa(d4MPe%#Xqb1$mCg^^54|3AUaTkLr1UKHkJD*6Qu(Q_=Q$T$o{aH9-XHnzH<#c7
zqAGO;kNIZsN%BWcKS4;vcTTxk!|K$#XLAVtFe{$K);G`$cp19UDL}ILG?K*r{QYn9
z6m1iETH<uVDsPGb!vSNR?DN&7EV*>R1=)WMY=}S_z6DcvS7Y4uxs~5a&B9?QOBxBX
z)-<q=sHaiPAd#LM4SKNkTMp?GX0|SRjd{|OnY-239f2BfYk1EJwsb;+Y5}e#@RSK}
z;zqPUL^h>pz#>4EE<~8}M*`RDRR)?uG{Ct@lnG3S=^$Wb@j++v&O~oX+<l+VW7@bY
zupLXp0e8eR5?3?;@U~bS3I(RT+6zFlAO^P#pUEiwG8K{!W52%TSR<@E%XU9YpdCo=
zksb5_|J3tkp40B*>Uh8eGzlIr*JO)p?JtdR*P6|xWpuyfYu{@Cbd4<{9>X<mVx2Hi
z9hJxg9C+dZz)_@}mIj1cB0C<)33NF`dG%iP50XVb$CW<sZ=`4(hOt2fJum;<{3bG=
zcsK<DO$5p)&F8PCygK{&B@l3hRlUZ5ynbgemM-)HJi;4ILcp}pHin|kZA~gpI%y!_
zd415?-dg~HZP2+6#8FZ_eD;8SIhe1NE*FZ)AJZL^N}9Qvk>yQaeUmM>_7a_Fm3sB7
zJnr&?Im_(NHJOr764Qf?WHCI-M9Y$F@~0Y%W4d>L5r0lBsW%;b9Ib&p(UTlk%uf3)
zc5_{O=XjV7pW@m>6lJ8u=Fv6yNx<<KyMEV&4{tHc#Oe}k^T9xH2#)#z*j1`9DTfQU
z3|+Wtn_}|dgD@4<&ST3f*6&`HdtKTe$?~T;(FjcUkD-l_uDg<Ud~o=xW4<$5mhc7T
zI5-VeOawHJ3&-Z2@_DQzs^{3|PW$m?Z6*>t`pUyI(lY_c%DoMcpCy+mOQYpm_sSqx
z4FgAPAz$Yy{4)nD!)Cypyy*5L1c?fdBg^H&D(pOX_d`WUQ2M9`RST*AqBoW{2vf+}
z(;AvGP2_#I1H9025sH>SpmcJFkN8QQA8BcTh9*C#dh&n@1#D&JPfl6heki{yK9>mC
zS(R!ktu%cAz6W!1%t1|wy|gsey{B&kt$9+-Uk1dP401Bv_OmLP1{=i9Ge(>3U2KQb
z(3z?9{?}HUHq;zkrwU-vV6;QC2(AXazo-_%y6BGzM%_R{h+44(oFgd!bA`wvU@bZi
zee3GS8)py&OyrXR{uYUk^X5o8^*LezXgKE%y0<QU#;HHj&#*|7TR`j6)LeK{9|{vr
zLh^iPdN06K*wo4KyRg;<3+n9;VCGzaQBhHSwAkhufRjg@;romn3dFRrRZtN4!j8dv
z@rPe8dTZ^rbMvXDsfsC6`9--j11pXUt~sH30BZbZzhrbM=K<lQ67&7j{@-9f%{24W
zF<_n=LR%Pm_GxH%<h7A9k`NXlYqD%TIAD#CInyPAu~xxuLm^;?HXp#a&M5u8JcE2Y
zHOTRZ9-;J5<qP!nD2~IUG!f=IKyNk<i+y<`4l7B#9pDg)pOm*JoQkwFcg!-<*jvyx
zBV6@G%Mi$Cf|@Aqv|MiG<I~9$pjuAd;kF$=A-I`x(4xxNH(~u)_nY)|!3ai7Fv4lo
zRzmSwqr?hr%Cw_c?9agD#|Nf-ersIoWEPK+^+i&pPcV@W2gHlPX_dt8K!zh`KPS5;
z&P|jk^mtXyf%G#TH+9q=KOvI+sDdWB$;ixkT|eH&18Bk^_(hg6qY&%spyrvNUA3@{
z29OrOqZG!h9rpsbfm{^K9hqQxrbHqJ4tZEUek=VGrD&#-#M-AJzt0A<)p51FM?K%z
zf8Dh*qV!vNC-xBw0fmHx!iO-rF(@IIxOazC4++AZ9II>hzhX-p<jFKWwT~x*MXJVn
ze_@1bG2bPq6c0XB+=ZV_awBCXSF{(vkvjqgf8IB0<FnExmx!Ch3sq#?Cor<57~{_u
zhe=E&QwYVi*6-!1JC9kBk(_nXQ+>Jf^A#fn#i!}cC+88hV3_WskSq^X*M0`NeUzNB
zzuH+oz~t^Gm}MU%XHBW?hthp3>*u1f5+RH%O0Ep9=L?0UmExoFCi<kFbUaM8xtE2Q
z=R+<vuI7(I|8W)6Z~!c6FBVh*Az1A&drxsc5}{~mVX`+0_8#c4=-ml#+V3(F9<2Gx
zl=X1I=d<xsT~jgx|3X8A+#rBrmLgmecx{;L+%Ty2b#W3#07)-j4jC`3=gn)Vj<q0!
z8nt4%Y{BZ5<rl(w)h$D&8YsB*0G%PHEkz^VCyeO;RzaEUXO8eQCcVa`)*HRKzZs)i
zWqJS*FJ66?G;yo_hY;mZj0i{WIeo`^wX2tB_LuL0CwyiEsMVF{UW+k`8Q~OkkWUhL
z&V|5}41j7N6m7X(fL33o%+Vk?np!Mq%~-6e<nZkM|I-gF6thw8VO^0y4jZPwSoD7%
z<SL{!t43Q$4)a^_lPWz^1so3-JoOP?GmX0$%0h_w+0^`F@Tx<!hD@`R;?dvpkn`<-
zk9F4u9eBZ2UaQL^#r7XTltK*p^88l_W~d_9?%`N4&I0~Y@*NENvxBrbday924$P4T
zfRY4~rV5A==r1#xgn#Au_kc7x+<$MXS_?B%`ril`GayEQM5V<5Wgdc09pv*s=Lpds
z1l~a0ZQ4u^1(y*FxqQh|GGfSsh`=C9gjOx9&RqSkw=Rr`A+Ti7_F<eO{#zh`Oz$Fk
zV074JT8y@k0fyjA`t_Lx7y@K41bBE2mTsPiA-G0bxISZ^dSl(qYx#~E@|&|AT>1(&
zzquxmwj%SW;{VnQB<|xuGOlsA77!16F@CZ(J?<Rip7EE`H=rR<+VRHla3cbc13fpT
zVzw@LfKb%}?6wH`s1Y&gpuO{x=wF%sjzboasq3p3G5~Rd6Ik486M&Lyzz)dF9MYkH
z(y_NJ;3ZYKlG&yZ+{F?_q~L5J<YAZ-Axt=|2S`$pKaN0}Gx0F-Q?l4{8^|c=ujdNb
z|2b>z{<Syn4aEIC5Gg^&qb7Hk1bXl`pjql{g_);7`QHV&2?FY+zQ_M7eZRj(z=+_Z
zUXZ#vglZZm-vK-ifrI!tI$T%c8}VWy)uT3sqS4CxWY1!IlqO~}N6q`pOar&F$L9ZI
z2H&1pF0KMf()pArtNPE!c{@EvgQnlv%9+gl2otXtjR+A7Qs=C9djtb|k|@wihrz)b
z8->ybgmHgF;K%Q)JHQ)m&;WuETml)n5lFbd4wilc!C&fXrwqv4$G)G<wx2zGEwP!V
z2#~s(F(NLMp(FVKK>cO#ec(6W06WAd+sw%f04+VQ*_m$WHl0b87C0PxcN|+lwoT@E
zfC$j+0DvfDuXdPdDL}h~^2+YMo03LrXmXN(LaPENQOy~{TiC)1)-O#KCz3@M;@B)a
zgXS|QIl0ZJa6k%HJu2}CU}CI7N<oJaLj&z*U}mU>FdO~=(sS>g&$z%a0%MX{1+3u+
zjT7h~fRr16j<N@B8{)7_wr8QUmHI*x!bEV~YLb`Sq;=s4r5mVRJE-`ObXd{vAJ(@C
zsV87oQng$n(Q*3>RgCU8GHz)%gSz-g^3kv&A~{{|2~iuECw~A?B4lKM5t57hxQ)2n
zq+!7}I*V9XSBfwfG~zL#O6fLWGfS@3wX6k?`zllCiupZhLNC_J!GvR=wIH%=E_}u&
zrnb>wCUUMJL{tqQL>T&DYy|*&>-V~Z*MgSZnplVYS}g7c@5A`<1)vMKpv~|C_)6J0
zuP_y2TWGAWy9;;GK~dl90)&m|PvX{|iBQw-DAyZmejB;R=fa9l+!VRsJsxq-eE=1?
zIE=xi8lo_o!k(2r<E*j3=C128GO>h2zGWK3;9Ot;i^rmA1~fg{o1lO0$D3{|y=)_&
zPnvw4O%8jpM+tq6q!y2R+p4&DA8E7z`agf-e+XySEfH<!^FKpmmY#^cd<Nu!j_XJF
zIfub4_X|B(cu-+!7tcaI*9=Iwa@k6}@w`nW?n_`Vfk;0!D0$mD=mz{4^x4cg1R#2G
zJ4|<2h;pPLWpP7|9p{{FEX_-hVFbCVf?<m6syNkeBr3`lBP7TcQS226i>N>c?Gi!f
zj}Pp78Bqz3bpgvqa21eCd%P?d$F>Q`Q^=9bPpQkz3<R?Q9?b^eNQq?5fkPC|7QhOF
zC_a+k-aVaCWT0BJxN~Xg&WU0c)AQx*XatRpA;E&vG}5!uew8KH69lKm%31b&fc3&k
zT+-i3#HBs!5SuTl#a#0AM={weyjGQX(~ej?no%l}R`i&61WEHF*zLzRgF(9dX!s`L
z-ZSOhi=<4H+Fd=#xh{*?yltysr?`Xw{-$Ufxan*pdinnMN3);gCWC=gXhesnJIqb9
zKvYu>wcw8{&oaLM9q^pBBu4^L>Phj9BrtVZ{R!g=1w0^`_uOfbS7~S!vdz-qCqJs(
z2#oKv1*_h^l1#Cus;aILJ%4!_ji1&1?YHa;+-|@j?F>lYVfjY4sq#R*;ni?iUnB*7
zQUTMp>$W$vk9&|8O(gIGKSyOqL=15xG$)ZEg)$I&p5=63JASeBPd;gzYC@bUwc?>z
z)rWvVlhD2TdZC!Ohsn1iEu<IPgrVr;qC%c=Z-w;+AaUvH9l;H|06W5v_hDq{v>;<R
z@M}rmxml#EJLy~=(nJ4sc?jM928JY)pUAsj-_MidGYF_|r<8VQ@$_w$>i(J=14@ql
zu);<C_wGbUt9ov8On+8$mT7>XY~{P5I4n@fz3XzzmP^%jTA3{Qx*TWEjtF0fR#T}T
z7=GBt$JPNjksOH)N(4gG+-M%Qw~9k+UGh#wMz*sGTaZ=+XSMdEs4!oXkZW)*56b{t
zIi5F@HEiW^o?~|SHi&W>rU`Xqo@~V=9sv%$?WT0hG!k%$EV70EXqCJ=AphGVMW;!3
z*VP!I;ETsKEgd8TXDOE165rmE9aBCAMSc6T_;vuvxQ@vrKLdemB;qq4eF6$a@~aY`
zq9&+S<#2s1D=vqjh~@h}v6QF)hL-wb=M(?4h^OuA30&lOs0pWjlyBbl-h_Z{7gLXg
zl;jQ31pNWBIjgbOLFBQqh}T<%cD^oj9oQDD90i#$AGufGMh4X4*P?Y?_e!Z~a`a`U
z)fAaA#iwY@xTw(J6h=GaT%$Of9KD3<NLpwOSxJFp_9rk0;41xbS7}{<cq8`t4jlr;
zNVNpkf|`!LfE#Jd=kgjFS_;j+Z)BJ6ybO{A+xi9vZw`G}8Nry?;o4wE%TZs9f?XQD
z_u^x+!7hFfc7i`uzLv@nJ-^3lhk537AX+IfY3kPmG(DPt)o{Zx>>jT%o>(X`%>+fT
z4kmTLl4H?_IRM}HH26v2f{{0v$zW<`Vio8}&C<d3n9)vSP?doFCsyKE*ei|PW%qf<
z97jX<x<z<pZ>=i;=v}}KevE&-FiJspclubvX2a1++=`~{p(=(OBIoBrk*>+|;^8U@
zb&e7_JVGQl{@#&Yd5G;}NPZc(LrK5B!3L<{e7ZoexszNG8E{h~Jgr3JNj3-4!U>%=
z;`y(;u3W{Ba^tVYP$;oA<n8MdGNNzScfteGV|Xg2tqb%5q9TV%nZhC%`l@XUIhXaL
z{j8sj8cNPJ)p?@->d6qCjUia$wYKo)dihx4<C`{b>K`-$MyGe6FE}a@(Dh}aKMxjZ
z3R?swsyn-yB%7=l#_OO1RA#SF_Oh$eAaWh>Z4!yLV*2&ioWX`A7f2ArleBn+GPVcG
zZqE+hBRLgArs_bg8_z(Q7~mMaoBfKq6bn|fNmhHptKG!nEFQDFQ||asc$t6ny8^&R
zf@m`DFJ6hOqWeQ7rFHv1KlY%)%n_&Qdw(%S+L!RPkWBx1Q0;V7+-$tgocRH<tBexB
zp7A80j|@mzr)Nux)&{8sER^Uk*iD4~=1F2MIeK4#uo*vxY452y8+KCWue9nlHgw&u
ziSuGA@cGlsKY@|k`r!t}GH{fnu}BJwIkFq&HGy3y*T>2I`TXEa@yCz>moKj^6WQ7#
zT~DK4ZbV@oiF?Gnr8d~*L!luG@ku&rM&=K-P81c{MtMeFjf3a-Xhl4eLXpfY<XL0y
zrxvVFniX-TsaT9L0nIj94~ywivI5Ha$4u{(j!!Er+N&}8VMgo_8oBtRG)|dIV^&x3
zSIysn@emWO%7+nhxp23fjMDIUKRXVk`eHgbsEEY|VJ{_0{!m{2*x2O6AkP`B&G#f@
z;}tgqWZaH>X&Dv7hZKh7F@Zv-Oi|y7#mf3AE3vTWLHawot_+i*6;s@g*iIyY>y-%>
zskl`Z<5wzOvLsW(6mPU>RcbLZiR$sBSjR*G<KSh6y=SU|l!+-!6&lh{^QNM|h&;lj
zZnpW1H`sWk`>Ol6oqSo$jdpQ?d@|ce?BQ1eJ||5b=}>BmSw9Q$&&IYtGm5K7aNX($
z`4zmM#%crp3(M%OV!}bx%wLx`4$~pys7j9L;;;_AhC8Zn^<})^7tu>w!;ZcT#cEav
zmaN>25V>puzx9d6#8lTAeRtGs@9ynW?T}5NFt-V!;V}1xxAPrNT}*kJXD(@CJJQFI
zdy80e^d60~2i1J>5o#*d7I%3}<(1&|z7a;`Hf1_s$xs{Txi%O%7CuMzI#CbHfYcwS
zASReL!5>b1{qRj^jGg_%Xp$C!yZK7sR+#WpIiO=no(a)b$~X7=MGFyH4lWVtngv_u
zI`_MhBf(dqdSUp4jIf8s(uG*@4G9&Ql?-ZWJ0Z^s^h<O-aeXtmYp5Y04#AZu{3bA{
z$t$DZL=EG=?1JyZ)>xw)>?Rbt1qt=z$8qN}U7`5u)d#p-Lxw~6j;L2g9f^_)A&Fcb
zgT$|0YLdu{(+uWZ1T4#x`twnZ&4VEqrpdry0rI+d5xkHA)(I{mMa*d{9E^=xre*(4
zO20SsBYH17TnP2$VjvC90Sj%a_2#$SfOXIl02Xq5RQbhrl|{tL$n+a7Nk~7l<aCJW
zCddBK`sXA$j>@#D!GN0=IrDrJb#P@?0wt~QjK1?hAg6EF)BiwimDrC%fr+Av)|J&J
zu>$Yp7|@TN^il7)sXzRTF?zH$B5GJF_G<zh#!)*$a0*){f~s{}l^dFb$#<uvCmQ#5
z`EYys#{%fw(sm|4I%|Hb6>+A?aH*As%KP*p|N7YpFhHhjOl(Ly*AL?NId^GidI@#=
zJD*B%9IKDel(zJzuF)zDs7fS~#SPiiFp3tr5b4AMZC3g|PkR_!x<)BS(0l8kR7b8F
zW>kG8HTLD%`6ov=$_puaBq|MhFf2I3Aew?2hLZ`_C&rcl|6%E#jjDhMi1CELV*KJV
zz=mS$ot>f$OLC2U(dpG#fR^1Ca-nJ9lZhD9;G5A@_YKUodRU(lu<<yv_FId`db$q#
zhqDO2u3yg&I^F3ty$aB1N}|Z+Vtid+s?EZgbkc_Kg-XH@r)&ua%tRj_hj}0->Q^)$
zt<9{wJHH*~V85I~4a7HsCnF&kaI`wYKQm7o?`XTT_r0PxrHCG$ZL4u-sCE)7#^)t!
z4_EAPxP0J_Y!uCL%Zo(j6^%@oinw%nZjB4iv2#=$;r8SYnHa9@2RK$O1pa+tqY-9%
z`ZnY1m~xU<Q{;W&&qv8s81I|50VU9EhugHa#4V`$`>!01KTy1Ef7tLv%#KNxMB_o{
z>$0WR1NX6Vg%~6vmqCf~z&NN_jtjkpn95G*LB3QLV7&xvX+XVMPKg>zl4kV6)K?aU
zw{WRYmcFYb`qe|NpcHJMXOs2)d7fXxUYnT(B3hv2WQ*2U6z*hq%BYyl>A~D+DAY(-
z&m>*ssX`CE#r(aqxAgABRdZ8G`=mKmJ`$Y)%M2Z}A3bjc+fkhvi;0WEnnGp<osOCp
z1Xf*#q{F6DD9Le=T3xK0ir@ZvTzV&imRJAk>26fz^lMG-@-NnV!6QE+;wkFSd_@<U
zl$d@rze@322@>J>8pO{n5RuETv>cJ!o6=aX_}icbO7=NKXhif!!_tGEke+1;e;~cn
zWy2?U^Zovxk=qY3>*Ah`ei6!bLBb}Dp^nJFG~qk@RR>el5vJzgG!H_45ba-cKQO8m
z(;xjjWL#66`jYK)?0XvWbK+z-&U))$QlOn>jnJ(`ap6W1X|P}55(dUq)r~JIxUH8=
zl~F=Nh5Bjh#G4~S=caaf&lB&cR`rih%&*|(-<Pa^3fOeR7iDlmuqEpUiY8A)<WK8~
zX$qDZE;hY2cW~rQaD?N)9DHLPy3HhUe;i=-qW7zn+9eM@cQk0GOuVP$Z&Y31_kcKt
z<5*7FUf=+~8lDkgR9X)&ie~4G(2RIp$_X*+Hzg@BO5zS{;8G@5iJMG0XFaTmqIp#p
z5Oh;phwSuM=s<>L(gXU*IbD;z95uP|wLeP#P18q>@u0d<&T9HP%mw%FI`bDeFy=fl
zu70QXhkr#F7ZAo^t?f1YY&Z=6N|LUn<rUZXRL;^#Zf9!fv!Q#7fVO5&Ak_HmY-)}l
z@r4&K2aZNpKT7!Oy>}+HU0LkK#)k?nP_!zi$Blbx<U_uHhC4TcZ{i*wp0d5bTyH2J
zM=UcSd4V~sVzhBHHfkH^4#&ZqbvvIZsXe&&_YZb}A_|c6YJhb(>vZlu`g)EX@tsQ`
zzCq(&*M?n_B2N&i8+gnJ1$cb+>$HMGH$Rhq8dVw}ke6zL(_32Mm03VZy8qrx2mD2D
z2t1X74#}S{ya8x**~$*+h$l>_{}a9>KETWo`WT{Va+a8&Yt*tvIf^bUe-%lDD9eWE
z2;>c~H-fu}0x>FoKW>f|@dXmNo|6&gs2hAF3A}E6s@)#(+xP#Tk+nBxcK=_)=)+$k
z6e%DI%vQuC|2I);^oSl9H)NaNM+ro@?tH2Xp+L%XSCz~A<QF>&)Ia3?E_Z*m{WNe5
zk_<vfXLPiI5@UN6|Ab2Vod5K|Zye4Q|H*-Y2$60S+x|C4xC7`b>xT0KM3eFebJ-j3
zBAQns2s*&E;qdy3Xy@Y#%^R*}71(Ax*!vQX4P<0bd>ZCWz=yIhac+N#xz1Kjm#L=G
z$#!Kudjy%Y5HR(8RY;}1+YsaoPN_94eW$@mG&01Sg|Wk7thqMp@oYsv&Uoj4xXkU|
zKm%q0sX~BFs6v>{uhw_glo#lH_8B~^;RS?1K5Rr?CSEE83PvEeHjr7GoT^^E{P^q>
zvFf18tP+xxv3TC)_Q2~`DGS&)YyNM3L%)Qa#>sJf9IIb!JN}$q7ShxxvBWg=dXeUQ
zwAu><?1TBnimg~bi4(K44TQ!7D6%J63XN3uH}3rPE|#?19wj2}*;;e4OqfREw}D?q
z_mTa#hW2aP^izsCo$L^XaJUmaL5-QX*-H>bT#3zJi<^sKM*EW{1^uD)fF?%oIZ@<h
zZI;*9AU=AEsg(NS)X#PTKJr)Ob`O4g@sP`f115|bPKkpGs${Ka{%@g)5V+jUQ)UB(
z?*!#L*YqvsssHpiiLb^+)qda>UxPDiVQtl9npwLC?i4p8;y9wf6b^E*@A!~Zobcuy
z0vSO6md*lVt~5>5qzcu~l>b*<D2Td`c!~Jm0X}L#@r^ar$^-|%)HNvl2`I_Ub{PaV
zCVo@c0M$4=n<dQ=GdKu872Lu*0jcJ;vxSG|XKiyww@h_FR`M5>Vys_l92XqBb>wOF
z0ik0+Vs;<Cfja%(Cf&B*0H+#|_hyi}jK}(!ul=tHh@%FvKCji<7AcS&jC0Y}fEJ`8
zS@wh!WL(oEf??&25$YciGFCgt27?@`J%jq)Y{V%=A1;khiofNxhrB810p2*VAawP;
zf_ofz(ZTm#o~(;(N&|OZ1_D85pz6GeX?Q@a{^1~B<zcPC-<LC@xHC3d0qGa0M?Zmy
zyVXtrQimb4cS#X<(STEKKUZf9WXU_J+nu1RfS<!CKxQuybUjyljt7Wru|~#b-*W9!
z^6U~g<|H32ifbT(z{H5`-+R%GB%z^d-?tPHru5mIJwhe|0k=sud45lKU|_8sbpm-$
zF&1yob9|U2w<-bkE@&*N@I~VXnRcKzB6M9KO;*|vIrn*V->9quOIg2;Bg1IFE6>Ki
z0<pzBB?82%%W9+Iuebds>W~@)nOwMOlcj@DDGRhgeqyFXK2j7~e5_bxB%K7D>|vmE
z^Sg!YH-7>8@x^CzkA0v{0q6f1VRHjk8>5W~QG-El2RB{!x^Mr0+prb0py|cbts=AO
zJs=eus{<6;x8jmi;MyUWDH(?mg57PkLf~Zw(`BB5`)&?E`~>tW6wKJj&TGr|*HG(n
z(4h8xpomYcr&IGy+8`DTqEg`~X7?K(N6&r}xd9UOaC+3cXAV!*cYabntRWl}XUZxH
zIWrUG>ApU3yAR#`YW@1C-9Yy)iMR5F3qJmWChyxQ?H&FkI>aT?u}Wc$CD1q!({}Kk
z5c$GPy9461RQxtb)qkErQAn)gAlSepXk}N}7pMI7=-f6E1fOn!!1aC?D7ayc*!ZiU
zIw|k#B#}`jMz#Yzx1>X{^nV%5cSIu}pBcTGlm?7+jyYnebP{pTOU$~Gx&p4dUQ(|n
zf1e=>{Ckl1OcD-q3ek7|cyfSrax1qqkhMXhAhEGGe=hr}6#@wG1`roU<!nGo$4QNY
z=kh-@92G9Het`uDu7;8jh9{^!Sx~Y}$;w!i5BKKkgQ+TvgpE7*x+Wgay8Si5EC>+|
zBAW_`!>sOmM)C$%aS%jb6tQBG5!QDXN6k~287-l78PMM0))<J;eW!|$NoCa|!Gkf5
zt!vk73r+kKAuI#2HOki&tQl2j4B&jHnMzW}ArcW7nG|#&@Md`(_PiG#nBO29pjomE
zaxqcqf<LVcKZYUVCG$Cf|E4hoahUj0$cg~b^Jnl42#&1lsie}<7*iaj?sIm1hd?FU
zHp^KK3e`yiADZXw=2JvmMLHw`w$OV*cBo&?XAMXQ(4R?k`v2P7|Hs|d`Ri^&2zR?J
z3`8abhg70Ih$3?JIFW52Jq8xW8Ms7VV866W?biLbtb}>OxnC$+qBI_%0?9KXmz@zB
zz<~z?yQ80TvI}IMeu4|t2`%v3Nt-4VB9kT&PUTE|6~dT`Xb_GqRLwP!7c~)L?(s2J
zW8b2546{ADu`R*cx9xcl*Qyx7R1Rc*I_49w6;ZV?wgAOlK`Sfj8^{LwQuy^+{<|{s
z6|pOQ!ae{F!WFotP^=bu^<BwApm;MJgO;44wFRVAK+^9JJOk~3Ak!O^EB|iVwLZN`
ztUDr%tLK41{TQi8m3A3%sRD#yfGG71PbfbZmSOmg9s-MzkKi+V9S9i}2$UE6f5lvV
zG?Zx+mlCsmY^K$=X3dV1QJWoFQ8|<s%bBzsm9Yn@(MA{wN!YD-q;iOiJ!V4XOiCEW
zVi;*_(<+K#M>QjhmA*vfqcLeC?S1BJ_w0YWdyf3)IrHA<-rsYd``+ihzxz1k)m#;v
zk>3~&l#$y|w8!-Kn@bnk9ycOA3Bd@$ix1L)iuG`?b#xj_EgoF3wufDtQlE{8-B0Ad
zh+UrV3y;s)DUNBH+DhtIfE}Gs7g)5j-t*%)R-+L=r9)b>+gAx6#*eXbgHu+7<91(~
zkIs*4h)k5a7@n<!Ru0Q!*&h2&d(fVD*V*t^?ON+f^Negidg<Q5QzDia%5fa1WIT19
zaP2u@Ht7IdD)RcI93gLPtG$zR!p{#3pI6qRPY5pGbmUeikmsH=<}zdxBP-fxFWpB^
z+7Xvlx2mo+VJv;1R=}BC;dUOEK|RJ~>e%&1{X+{{B{nUV-mcz+%STN7axF+NuZUK!
z7Vz46SF8&q3|e&1=!I)x{!KXZnov-lr%H6-Zi}8g_`o9THL->2*@2QOX7io8fK2z#
zB>@rTcI}swWNC#vGlHdzyJ;7*c-el}Wybx#FU@59NcPVH2g7F(Y8IpH%Q2?LhDgt1
zD?*E!<@reVc~59bTe;opBxYuGw|6)&b&WqNA<rWCVtncV+>SwENF7BrbCau6hymwF
z??&DwhKgIVtIwcVZt8|=nQl{#K3mc$d*<(%K<{!@VtL5ush#B4Dsi#U6DtnJ%g7Gh
zH0UEL0FvuOF<URc0E>$6yD-9Apn7pnt24WZcZycBsttiXC-!R)U1Vj^yu2Vt)jkC#
zI3p7#@6@IeOAH}bRPw{jkQTNSzNAI((oZ?i_*OIN2P8c-ny;xzD^44Vaa~mxqN1wo
zIh=Y&#XvyXhLKS|@#HLXlZ8|Q64od>s-lnx&qY3=T=qjtf}H_O=M}Rw@(4haJcP~z
zkTky?t49B;Or{|upe`Nay_lrFzz1ANiOiDK$%#1e<}#3w=rr^SD(qqNpn@!3ml0nm
zgghncStDxzq{gF^Io;VQVaLeJz8zD&z2niZZkZ2!`r99tyZX>9H8#oCaZ$j`Rlc|n
zrByiGZ=Q&F)=h_tKOBwkQ$uuV;cg)2c9*hL`4V=Vc)$HLUSwb9UfLC@UL43md<al&
zrshc0m0x75z>+T!nC2O1>;dhmkWF!YY?1${SG6RcZCe(xYYF^XGsFde7_PURkO0*X
zKLSh>Wc#a=H#FqyisdnI0#%{%3snLnfbIZ+N2tbCW<ncWlR0YC`S;qa+AO24w3Zfa
zI+?D>->j2XLrVTAL=If(?o8g>Y)nKP6<+Yq%6Rbu_^%iEeZ_}Y&miZ;5rUNUfX(>5
z05xG8;HU@4Ta^8E3N3u+;V8r<2eNn?G%2011oa!H;<syNge-i8==Qg)HtiFSU`ebx
zQ~TOgK^z|AkYQnyB8564$2PdG<uU$dv^!C|__vn<KRQcjuX}_AdTsmckfVlh&%u&l
z+QF(?HH9CR7HrAmK5N`NpgDD+2x4osUMyFKZt#@~Skl;ouU}l&dd8r!k1oW}F~I+(
zJt(HdW2WK2|AEI$!+`_s`R{oAM>v>;<APy0#U^nBbxuSKi`5UmRo;(Jdt{p0Lr?(H
z$65!2Q_G=)wZX~@j<4TWh2w)MVGrRfB(~tBOY)hA36{|50*#I3aEy%F*yg>F-(?gn
zjJXlh8>6%j?2bfxKDmcTcMtV~OH6vkfZ=Ugi3rjGqS)_}y2hFoHJmW1ST|k&##FRb
z6ZAs}Q^s?gL+kYP^utl>{07E>PsSnFo`#zw@Qd@BPnfmFc6sEw09%O8EKIW;k_$-{
zZaGU@2`mj+-C}uzWAsYfdA&foy=q?hS{Bqu9TD5p`EQM)Y3C|l2e*CDxqnjGK}KyU
zVKij6ypYM#Iwc3|B~U~eJfS1Q3cXd3lVSQegs4QE{8#kxg)Ow@T11g{3(>K>#}4Y>
zqM<G3usaOOxSH1xND1K$=%f+pN5S(=%Kfx4o()^2V75*c#RT?}Z)HHKc6#_?lDJTN
zm<3SZ$Ls)tTEX`;JCPHE0Q|qRr(!3qgKn4J(=p6A*dYR?YTCK!Qo$TX+!uTG;BTY5
L_g59J%)@^IhzG>7

delta 21099
zcmZU)Wmr{R8!k#qw}6zibhk(>K)R&6yF)^ONq2XLAdNIg2uMpSQc9PAbSM&f#?tTI
z`<(0i)5WZj&t2m~H1bR|a($CKRI<bT=}Fgn4#W^?H*`X?P&6M5{H0K?qa%-_jU(Jx
zL-fTpw_qF;A7h%gosaNo<d6F2mc;{Xx?Q`o_m1`ovTc87%@`YN8(%ixT?edI?bt^Z
zz8MG_aJaCh4M}As=2|448YjxWdc=+@Bcnk<{b)&s@fXpXjnY?WHDqGi(vUWa@pHXL
zPo?w*g=B+W+~}X;RE`dyip{uBT4==956AKs$(=5za|wOU2_NQm87USRxR!P9BSU9d
zL5|v#PBX)mEu?ElUktUhC6C>>ddSm@Mz3GNA>N9#7y4==>J0Zi-bC0Qp{8c~OR0d(
z32a+wTkQtTH<-%rlQa7I$?zbvJindnP#lqccC0ADan~q$i;;~zP9;es!X!x#q%N&;
zP6F08Op$P|a5DA+sr6wcVS^YldYA>S$QC}*P-XDf-+?!Wk;swvj2N0mhAeSCN#7s5
z*VZ*R-Hq-JxEZp2z*vNuXnyvaP&Pp>s<*6YVFg@|IZ@$CG)Gc!IRd?{5EksR6X}-p
z@pX`M7k7$Ld~$ozPo|g@s>fe}^G6$fDv=a#3};v4m+B@<e5(GqSS+W_maPBJQ(c_q
zUtTjRa%3cBR70^IaRep{uStyDq=pCvl+xMnRog5z%Ue-0Ehga@hZEuV>s?55xGz+#
zV&PO6-RG1KJVi^ASZRS$&=RSi<gg`W@g3;x@);PWyw}E%2d%f`L_&$ge=%nKu5@vs
z!dKp0N|Ivz^*SOkKcc3C`BPRuw>8xM51~R4G}kFF6m=fxMV3TF@9>}!oQ2D!d^3pB
zX8oYw7&9AG!9@LpN)*MO(1nCj_=?fqA#0?Y&rwrhLi-IbE&?MIw7d0zdWiNZtu><n
zzMRb3{1Aof?b#TYWwmsv1sAqo2r8~4>Xc@eHa2?im%f7&b8$Vf0?Jsk5PQi)#T?_t
zFT&Y_%@5II2^9zmkwY_hqWDMXDf{zKXefBC&|gH&5C@xny08<7IvUB47d^(wqoYOs
z6mR!Jb@3{3bkSfNGU;1qfNZthsf!3UacDfr^u}Nx5<Npk_(I;<A-8UiAn2q_{I5so
zkZ8rw9d#6PHB`lD7Gj3zD6_HAsX|ix{>U+!MYDWRtLHWEmZ8Wj&8&HbH}jxK3*Cc`
zHP-EU8m0Jfiqm$rOa#HJ(4KI!+_T!qhk?Q$Rk7IXAp|_fu1M^>fdwf^&m^wxU*DD9
zw}ex_4m{9~5?NEnqhC;TD8X4UA`6c|jJx-wps-@%0?EAB4Uzahk270MJ<^IE+H>Bu
z9K|eI>?YhqUmBq@IYtgHW0e4A`ZB5?1a%~T_&uZC31y<$Qy-BND2fV7KI6vNXlB}Z
zG8*k~-V4c-$&oV-k)mI3D#xQuH156WJbG2dU5xw;O^thQumXKY-KMFwKGSZ&0e@2C
z{bUs#vd-Mq1x_Bx0e=CZHZ1P}vu}LVGb8w7$vaWqO`<FjXeLB-VnE3wu9XvuL-!aK
zPt)nIzzmf&Xwks)##~a&5`pDsp}pGH`Vm6i3+)GIrhf6&Ih%SH8x$~LZF@<N%>1l7
z-HnFbk6+k{&Pv=M!FIAk?ld5s-P88>xo%31e~XcgIn`h^$uX3qZ>Md8xlU}(j;7&+
zz081DrW;p1{xv!|iH~qmiN3NJ*00_|_AUaKYV>`tVwhHW1WgmSBhNyxq{z?<H&MC+
zU#MZtOlv{Ombbd8%1+OX!?NBQ8#NQ(`*E6SlKurcqiV&2MPJ(9n}jgi<5N@Nbylxw
z6~YSTAV-;O?FY$*U*i$pv|%29JA7Iqd?#x|P%}OfM1wKyHjvHZQX|Zv+`tGqum`ft
zkyWu~>W4-0eS`VSN!jqUBB@b2qrXDAB!bS8sBusRgFd)*`7WMW=QS$#1qZ%jRgPlj
zLorIyw;H+`yAls0m8gl5VbNKEe<w`a$zmm8ty?CY{)XG&DQ~O}PbIk@w^^gnZ|TIQ
z*+P_kre1~D<5jr@sD>#BZ$dc>sW=u-H+(Q%3DvwQAB*L<7oV+G;Yl5hL#insAX*ma
z<dMSr{m{{9-J1o=#C5)eki3e#y|q&uqQ=Pa5Uv>;n#7IAgD0WZQHpNZd@_RFl}*;l
z30fod-5#z&vfOb5zGP|YTGCW6_iC!tA1M+9>m6Ap{MZdaX{708L*b3J4{Y}&V9y%l
ztkFc8kPVXBK*>8<HJWfqo!j6XfL_NM(9~o5O|NTqqN=;ux=eTJn}^)uCL;<d*PAq^
zkCj-Vqe-JGorP(4xtb#i=saU6c<4&i<BK`A{gUOw^twZ&c6eS?7jA48Ga`KBif8D}
z_3sG&8t>#kc8qmQgj<+#%tYmsG_Hth%~1ZVazc@T23%Maj=_Ch6aEsiozDrE+*AJ)
z(UM#2r#z`4Zg1O9zLxO!qQ6C;428GZW<6!Rc~6J)?;8k{f>2_VX{57d+cq^$WXsIH
zGE5(!_*dKiptrlxd?cBNan8z<XuU6BSd3EjC@FWQ{+cOA6V#I+6b5xRMDfV^#o;rD
z+N%6>zJ3Bo`s!=8WM4O16CqObTNH*QKExe=k?l0ieHRo{fjuW;&*kR^_2?*M@<;Oh
zTlYe;X|mP%{hl}*hVoWB<GrKG1P`4aMM=}Iq%^kHfyFHj**u>Uwqe6DEjwVBKH78b
z{$*oJ^kREOkOJOpNhD*!wWfh>PeyP3QEWyOUx@fjJ1<2>xB5|1sKEDhT@zX+o*5QX
z0tKHXcezXAQO`4#%WnlQIg5o51r*G9J(n`)U1LzlC4+J|mwi^>eQ~I6Nz{zvit#u-
z)5fr2-3iq-^*@yz$X4?>?7lhjoDd*gt<ZC6-@dxCZ)P!AQzHE8+H+^%SS!2hm9`5@
zC4}_sio*rJ*p)Cj`@84V3b3s=jJ(fzzMZb?()fWXyW8sI9YZ(S=)HH}OU^%Qss;R5
zwDtdR_EGL(Z5^7D0*xusao71IW2TYE=^!bQ7J+zRy6a2wCZDp6&63wmQ{p!mkMepm
zTi#-@ckI66>bd1LRg-q-eRKuBBA^%X-pL?TTm7cPVM1VjEA~<>x}^2J!&+}J-jfS_
zQ=@!`s>)XvXQ&=$?LNONy+bgVqQu{zI&_<;(MfoJX|kPCCutcj@0Tw{M;!FWXDRLZ
zx$@HW-K9=NWTN@S(emTcp6mT;u6GxopsHV%?@>v&zMj+OXSsB%5$^BJ*R4KN;<*3m
zAryGEevjIh=5y00MYh_mvr*c+m6ylw&L<yBi+*wM4ML>7%P@3VpHVI@h!<M%olz0O
z*_$_oo)r4y?D;ch;H^YnK)0I@NsYZUDr_^2y%(L^mSNZJV916Z*oNKS9JW1lk`~?i
zm1QO`u@Oa|Aa*vAbOiAcs@pJ%ImBn^LnsU;FArOUmc2Clylo9AX-AebI&I5dHh5r^
zss?>`oGy|QBWQSVo<zZCqr4!y<U7vK_VEDEN-`mA_>lqa^_$(FtI(fWT~iHYiDuXZ
zqq>QN_|+ORAxodnYnyEA40VYr+-RbGDYo&)$6d7fb*()oeOR<qwe=%(v0)eGg-~FQ
zPnJ5K>4pKg8g_lM-hbTl=XcUW?Jz3N(pAr?CvD5#hVnfZpOmC6eo3sBe|wqmnNqZI
zQuO;c-;~&HkY$31&F*w@RMqf9C@(shWh9r{A&xD-wwRhcwBC0sS#Fx`d`bfH+lKz!
ztX4{t($gnAJ`$#u2!f+5JdrIZDJg-n7)JQZ9BUQyyR_)$a#9z=Gpld;J}d6SRMMg;
zxcWr<?|+bF>N6Mr(c;5(ElnN|k-%jC+;6)EW4!fT`lW&QQT~*x*P?AR)V=uk1Dh<`
zQ8l_Mw$IYHPty|Qx7{Kyu7swqe;y}N<6P1ZmE0OSx7`24u~q1Q_MyZJx=tX*+WQ#K
zkIOx}2Rp@ZTWY_ES)GKnC4F`2=t-uRi@?p)jm^g>36TG$9ru`=OET-{FZY)ls07vG
zZOGl-<48gYNko6B0-tt^P3R`MK__n>d}@Eh*?G|DznW{<k4E9tTTQ!1;qsOi55Yxf
zt8_AnNQmyI(TaMzx%<?12BawZJ55{HIDL2=RQ}O`M_X3u^Xe<)`pdS30lpNtd$;FR
z!}P&7zN_5<e#W@zVYOV{S9Dy%{GnXt=a?fWx}QRM;vT7wc>dbXJg4#~pfjI^FiPcv
z*l=%9{t&4OoBukDkU2`1GA_RV>W+@om(bJN-M*gkb)n~^h!KHN6(n}mpeM5@PdRCR
zz-)vLX0U!*;PUhN)4$WQee|vJE3d6Vumqz07hO6JUEW>JG3P5K#$w$J2j8QP8e~+I
zvvK#cKo>b3AZqU@@xmVeRtSlo5fB-tk#U@?;l+PLlDUxCV>hdk(rpOBK{F;?9cbJ7
zl?+pwhORKzBOvnFZ_!-Cbw7PO#<U3$T+>BnMk8Kx2MU&HJ9o~8K1si4)Ojgd`}_Bw
zW#QWWS1(vc*!ia$hN&q$&NBFZE_r%B+)}*yy83&FFNlI}HD&9^#6;#$xFM_SYz61h
z0jY^Etw{UW`Ij)H8-Qv3rZy)hXTtNECIl&RE3(Uxa^j$QeUk!T3?|fKX##2lG3<rM
zuiy5hCK$(;kEuRBl+iChDb^NRazj3LHW%S+niRir${C<|^Z-|Mm&S_H8F%qSQT-7f
zH8rej(_`k_8x^cg-tRe%Z`MV8R{Xw&)BRSWF-FSEGeYkgItlE1iNuAR{2@jquTX~S
zR59mB73-NHfKcnLM5GY^@meSzlWHug77Z7SN-%4TQb+i#-qjt;L0ggP$Gha{yvX<^
zsvFq*bGqfV{MMBtZZ@+$yLW*CWpa5xpx@6{^=)~v%)>-5%cU3Sc^xljb@9f;kbSFn
z1W5%Vbk$5G^|!;bp3y&zL(he+u~K6?8SqmdOUplm1f9s9#Vw%gs7c&;b!U)$`t>a!
zCu-g<9aBRrYc*Ma$>dZ8pMt_!_EkRMY32ec`d8|{UbG{xoY0v@(c*g+ZE}lAc|yZ$
zyYOeYH&*Gh!yn$$k5G<_>7)tv!f+d<Z8~9gzoaGR&N)Ns4-f6NAabSJJ9?wuf|?!P
z68*aX^7^E3+Iq@4&?(GfRQi@_GZ#Gr^Cumrr%7AA|A|(F_R<T|sbdcqQoV|vbnW12
z#>wTbfl_u)7y@Vhu83jdhgnVxw=Y6orL?WgTe#FuyeR|YFfg$y@HZr)ME@LT^sI>v
zM2@hoND^c7kjV4&!J-3clC2Y-yulaql4;r~h3HRG*QQU@qggSqmYzh5^x}~PD%;3U
zBOKp3g?|KfF8(Xe*6nUdqMISjn4-fP!_86W@ehF0n5jO<=K5tNd#nchbZ!_VS=#8=
z5tZGsb0Xy-(UNCJfAgW@Ef(QO0z$bE7hVNt#MruBf9sd^=gG#%9+lbi)Fz#q(sAv<
zkef4aRpF0|y_v|dKe?{m7|xq$eNPZoNOT%(iDG1!$)vL1QfZjAdlk3%Tub~pm%urq
zX|wCX#96Wbeu!Hg)}XI|>nLqKcVE|t0Ey-m$?&H9TP2I>7{LZ|4x%TcPm+Z^*n63a
zlM)^#Av_yc#C?1a<gt-|2}oXCj0{$oDP$(w-yMF0ArWV?WK~jkPn~--&Jk}4Mtnl?
z8We&Xc1x$&XWWC@27jm!5F`;4WTdp#t!eVF+kHH969gM1zrK>x5{o+^AR?EZ*z_~X
zkemaUMskCm>m=9qT^-%$sVG6Z4)mY57c*+v^5F>1oafSbPtsaXQ~ISz$;0eDAjTu`
znuVs4WG{`)=$)-%LZ~=KustbBl0S&X{K+iow^pOTsA?{kNy(sjLFK_u6}gg|RwT#?
z=o-WP+Kz@F^+5F`)y1mIii_P5Cz}oW(H9vNLk_s-C|W9EYlWSoo@CzX<IB&Q*{ig_
zH^nJ<@&8cVH{a5T#>dV@$F+olnH!jG^%(O{^_f}o(oa>c_<8KYiT%UFqG*@N%!#Kq
z8&Sxg(lpKsIwiIrM9Hhb9{DZz#ml2h47tLv`l7@*CD{!h242xh-DM4Z=r?>Ql$7Gl
z8y0T&;?<=e7wh7d!wkWi3R#=<j6kOy&RkCVNff`TO|hBas{i>&lTHBC`5;Zki1j%0
z{m8($P%-;TF!wT+X!OVn9X~_sac_kQy}=W5w|+(gw%dA$@WkYZlqM~PjJ-ymQJ{YG
zN`^RK>0xkzw1Ea+IZgk3d#K#&1sP0pLt0cUN=^zSad|xIDduvlzfy|PSXf8oi?<)@
zV+61$_*@@{?Z{kuQb6V*7&|R}=o45J{rY`nO`x!6+ZIGOo{Gw_Betb6529i;^C4R6
z^p&jAWQV6FXarJ`$8;PZu*Q64JV&5)jB3Q>TXUyi^!>h*>uPf)gvdNlPlOasQLu*1
zy0uL8KCGC{mn?fWqMrLULa*uV6mK!s4C?7q!$!w28XuEp14u!%dR!<NIfR*W!?@ys
zEn|R`<>`-coty#6Apb6sPIA|&gM`rszGQWgLAB54jY;p(IY*UuvLnLb(Z_Iiw&z`e
zLyg?ax?)VU-Lu;V=-d~NJ4b6p9wUv`5HEN?U^R7&I7^PuTgyJJE2Zwstv`~9R{j*4
z5UmrvZvBQz2NcUFn?uXs1uVLTUBOQ{kdfQBhSBBJJ#Uyhod(%;<XyD>84ME_Nm<{j
zKR9Yb$|-qLUiujfEsQ+bJgU8({C7!mL`?$q6Y^LCx9&AY89v%9nBeUkwrVNj=OH{b
z6y)v&;&^`SMY6dt{n)m{`u!f1XAHg3r1AX94F=UIkfZ^Mu<T4#)#wXfWZRAwR=(U?
zJ|tl)Za1&i*^5yW;ls{w&B8y#P*y&S`*A1UYy_TS8bwmo63ZpNR`3AEtC--(idXe<
zs8|mXA55EQeD<@QCBz#;iJ%hYOpXvH$v$XqYNVx-4L6`WLyp%gNPSATxgh(CNs}Ro
z1#z5j1Y#eWAxP{i)z3Z}8)a0#hp75-H(FipR5a$KJ6^^v_4MKMlgdVP&q?$8wVu&i
zn^>Z?=Hp%|lBwi5-i#^+?T^#DP1K*IKa2FJ(~66Wz0lT45Zh;e)KRY#FRRaP#TbgY
zd4K3z)xRMF^aqEjcji3?RSMs{eO8irs0GwS*!pSXFE^Y1j6yEqO7a`WLMeybfPlPi
zT+x@fvBkzbTya4>_Mak%g>QG;r9XZa+E$G^z5NyS+l1nXgmm_-nUFsWpCnUKljtP9
ztP-y<ekufEy({i!^3_gjm8PRkUw`ckH?8Eqrv#xGWCZcDtru(CFhVGH?tk^R-JGCv
zo97c1Ng`?b7<jR~Z2w&?IgdjA5J_zfGnK`Anb%W`p=ca5=X?BU&6@|=(tl}XP67gA
ztiMXvUXF1y8eo#t1b_+#k-)hjO>hXI**^O>FL_@l@L<FpnkK-@Iv*AI(wTQQasGzD
zvU$2%G5NxDjtfqUw=wuv?9rduGOGldurv|!j!W6(KcCaL&2xX+=dH_`Am6c>@R;>m
zWmoz!wK%{(`P=J`64DXazxJ!^M91)7OPur~b9J4-n)gHCFaN<XedX7*7B_>RK3&i|
zv)^)V!^p5`mo2Nj0YGQV-4E#NBPXBjjkeXm8&Dh6MbyG;)t+2_2H>-G<;C~s8TW!)
zPKPMY19m5QW%eSeyceu9jY(=d{&86xdba@rIj^g`#R!2SRPR#29;KX`$6uVhzc(EB
zpb5|~@bI$1IzX#}k?*~5x4~Q-W&F$juK;7lNYp5?hTxT}VM6e$+XD5A-D2wu{hd%e
zaWZZ*$*F=Tq=3uX7WnO4&YPNA#tU4<cpk63yN5$ge0d1aoT=M;)L%vyX_~6h5B9#-
z_2A}G2V8vI23Ylz!Xr3&6kKxa2kfZuB3|$dde4^7s=stTc%03MAE0rU-vN<kDT9iB
zcOG{?W>^7Kr@FTF%ap{QuCFeg6C&U8Xp?8*uzlg{!pGLHE{`YpXX;}INer>+ANX;;
zT|@dZ-Zu$2)sHWZk<Yd{{5Ru;?0S3t+#SCUz1MW|Nr@{wkE)fy>C6-^WLFcqt<*3*
zX&kKyyZ5G;*YR_O#-H175^Y~ypvUBQhi$9kzqV5EZhp#j=Rv=JVWUY&6dSy_+<*R2
zmK3~JxuOAlR$d-x03Wra3?boRmSsZwarX@zg?>6+(#WgRSq~T6D@`o`cdH04hFzrV
z7})>*^{vpY4`W{6mWo8a{c0n&4UDL^j(&K~M`a49dc>y!pENX}v@d{meKZ{7>||%T
z1wuiFJ|Ke0vLxmY8e~;jZAtmYxsf>-_8kG9HGV#;z1RV$U}QM1VU%7e;%*;+fAOfL
z46>q*7`e9!aM{`%sXeS0&Mp8g4ZzZLmYh%1XOUzwVO?GwNv5~g@ynNCp;dv2Iz)(N
zn!K=w^l%8i@0Qoc5Jc62v684ZbZKXN#$I~fGfe&R_OcGLUN`-7P3l(=c)eF|zff;3
z&yz-3U0xV?-8`d8@yQ;$m6Nlze)JDmMhV20>t7t}tS#<u%_mCyCUYKK3ddouc@>Wf
z7;$}E9nsigZQIHy>r-LxC;nF_^_5K|jp-26$F-zdm=X-_52LyeqN@Bs*#MSD&a81X
zDq|8A*bNU@k85^BcCt3Xz57npd$ag!?SMHx(oLNa+XmY+->nyaOXe-NTsle*UlG?<
zw)@Zn{oYAp9UiKNlQFKIlSDGi=&m%Hs%aB!e;R8rqJvi9ns|(1$9UVH*To<LY_b6d
z0kD!KZVsA?sWG30`7!FDjsoO4LHe}6EK&R#{nQKSBVpQ0IeK3f6d#`&Z0ut1G5*PK
zsb??@&sdva|8P(R);b~v0|WQ(tIfx*I<N=>AZIz{jrT1NeMZZ5>9Fq_+%2t-(T+x?
zbUvM}kCPV#5Z1NxP!I;yMHRs=%=9dXj$mGRAsCq(fd6;*B4Ke?peSiG-@l98tD7_O
zoM2{T_rKmNH%Gi9c7~ZH6AfF)w}bVKCw3wkXPDKgFp#LAgia8978yQipqBzVG)<qe
z#wZ<ZwQ-W1s?uPe4kUr^XHG^=B>g|5o+XB9T^_DDH9-?cUFWKl32%Qyu_eBmtTc~i
z%%Jh!2OVcc!lpG(V(1jiQB}ILSC*SV9P&N-BIZxMYj|Fmnyjpy)zV7DwRz8HwVEfv
z`nw!j#%Ur371Mppcv!}*3<ZnU@NK<Th9=Xm>Bo}iRAqTVM>@A)H`iotdXitL)!akq
zHrSdOLg|_RY%z4l)BjBz48Ykan;QQ|nY4H&+edk93q#Wi=)yXF?zA=CrGZH8_?si_
zuUh)>tRv!M22%3uSZ-62m$B;TQgGEC+StNI$;{*z<~9>0OrBN2u#`M>R#Q?!-YEW6
zF{x0z0ju6CFYHbTA&Ew}n}|{W+%`><4arL0CyzR*G%$?TCE%j{c%(etn^ru~%2$&}
zW6Ewg;My?0Anos6L#X)&Xl9p(Uo(x>QQn8;(apsa$0~=&Qi@6wYpC8HgH23{qeaV2
zbLeX*ODtk^GarM+C^?S>N(eR)xrT+Wn5o}(dN`>`R_*w4uLP%3nLX&Lq|^$?3N~tI
ze#wD>a{}L~K*dVp0&oxWcFb~<sEj7mPN$0yR#s0otl1~uZ>L2}#HJpG7(t8oDn{Es
zyoQ%i`{&WJ{<fKA3%RHlGiJ1zpP;etdU&!FnYP#M>2Tx*kOy(arIhoTS)aj`wvDzB
z9;cmb+o3pRb%s4!uO|?(6x#Z<jC}a)Nn$f*2kSuq=%&}^=0vG-H1%1&A+wlZ^N|o{
z<VjC%YRU)`y~~tb27rmmUQg%=mS7M<gFeneY^QvFFAy5tc8ep+Oq_GIM)H46^?JS-
zgJL9Xv?Ut{ssj!hhQSWe?Mtks(%H}R^K|%OiG7IH^+qhj6*Yjley&%)Fwnga<5LG~
z3Htzn<vmh^drou3ts>iGJ9v(tJIHQIHWl(E)-@)Bt(kk2h(pUoA#DN`LrX2l2hU2U
zP&|M;M@Lwrx^6m;P(Bl(-0rqG$z?l#cBq1hX~X=iZ<04_g)vxDRU}-W_17_P`GZk7
zYqcw-g&I`0JL|+zpWmPOq{;%TdDVDz1+VyaEqzEs^^^Q0ucyPQ9M}bUhLTLfa~8FV
z!?#h{+DgTGSpMnR_Ykc}EECi8pBKf(?DEIKCsolx%{93#%U@KJ1bWLCZ+KI~#~lkM
znVo*Z4}{163wb0tp>rFNC-(FA%Bm>=ph17$V4OXRN+a~2|JpvDJIH;$^6up$SO3TJ
zYh9pd2ZmbwU(7}mY=FV-<5ep=ZVHoh)WJwH&~34qRUE(l`*k$I-n2yk#g(Q5<YQp;
zcYbv+o&4<i^X|Ly4l^~FXi%T!t8qp2>5YF7P$1X{w)P9%Fzcq{`;Y7rB7ovmd$n?F
z#;Y>Ye*H5J+R;x~k^z2t)&9k(d5-n}hqgea-MaTQJ3XoR|AXN?+{sy+I>fn+x7BT)
z%T~l5s_ssMhC=_ZmbT0pII8e|VE90>Iw>IR3OEmdp#OcEx;hfT2L@7xb7B*p0FBez
z<5&LgZ#2OfjPQ2a{eRjCddrxP)e0Aor~dz68z1^@C7bt0JcpKrJ-t*Vvu8zet6G_x
zs`{*${tld-D3Wpx;5$A7#!XnWX7FbumycWA!rK1)-uPja@h5j_;vxu!>ml!3X=sGx
zEZ+-2^+PX!J0q5vCA<$7!dlXZ9||~V0e88hCgYxMrk|pNw>A>tZW4m-%-Z|V8z4DU
z>()U>%bh-1!!j6K@<Bd>LsNThpwP#Y@r3-Le7thdNiFVw1usQVlMh4Y4^P|4nVfW`
z!sSYooCL|Gj76{bw)ceSDCBhw3&$x+oQpjWgeOIa1h&{PYU}@XMmEt1qqp$6Njoxz
zm6{n)N+<xh`*CBx{bZM4*%$IUPtz9{BcvAl51aGCyD~~hEjv9HM}(H^f54v#<sx7n
zu@+qxjvHYGylmz(9Gt;|v_;EpD!5*bK75%oS3m`S_iHEDkiu#1ohHD!HUCC$BrnK&
zb)29xl!URDyB`3>ZrR84MJ}Kkg-mKfHulo0z9;Petfl+taT33;dVXI!hGV<zrv&j<
zE6JquZLu_YG|=h}t?T?9nvoDmP9gvDM=F8LpotH5z=H&rHwf*BkiG<b-I8qc7V<m|
zm;&lJ@;zK9=X(90lYUKvggXD$os}vLDWBxdJCA#WBL%&bp3r#1<(#2yPod6A=v+;h
z9BB7;KmGdqCg4TEkur@YSOQ_bpp)7cl8DY4|NZ<~N^;eQYTASSY(i*t927216#;Fa
z5KG)$Xm+QG0xg!SVSrFXuu{P%zkJ-`nU&yUV<dn5ChNexlxT;UBZ^|TVn2*65AqTS
zj}tf~JK-6YV=sRk`~oUIO=~yMeRwQ~-~#<E(f16?)Euw*O#$OhKL#P=TxU0HB!J7^
zx3N_ko&&kKa%YS3vA0w0^2Z}XHlA}O8hn5>x!}k+-}{RK&2xO=wIF0bE$Tm($VE2l
z{jJ5K`}XH_exvM8BbU$<i51_?ctHuL+Rt}jpJ`L}=@8>HtZd@&{CpsfMdSvHWVq*(
z>)a}`TNL_={Dk7i_jxXZ=f#Rhexf*@U{pQ~I{hnKbiqb0SE_0`Tj;7hgTw$#<2}8`
zV9GKZrt(pKX81K@Ler*&!|O3;a1J`zV<RF?KGIalfNm%@EFjw^#~v=}g{3!xkpm7F
z;1A&lO}5TGcRvo}UL65*aQ~^T6b65kB&SUf+U3NuJdF|(*vuJWv|~@2eV5s4n+a+4
zy*%Eu?^qBvk{mk=E|2jEveiZo1KchKu;w7CX<jlx%QI2XrxI6dp^kO$TIWJ`T*(k2
zlcx|ciEw>}Z%*U*CKuMraEb5$UIjSaI$mII*Uiyyqe7!Z6%Yh4PZFD|{j2}({06xQ
z)cwE#yaT4(GVq)XI?ER?Il-dI!1)D$!ZLV6lA`Kf@^W;F<-|x0w`gMVAmuh#Apu~0
z;~Z8>>&<^o2J{A62T<!kd23cHJ)3cQ2W6l7{PWI|$FVq4tYsVEQadXtOwK{7{Y+2X
zoIf7`M$t0`1p=-lv|M0cmfgqthe(BA!<(i6bQ>^BR0S8U77t&I=FBwS=(Im>QlP;?
zYR?<G5>*A3DtpI__;Xd@bam~*n{zm`?e#JTDi>f&FEH|1eh%C<dTpxbpEa7sd6lxF
zk3i$LKfPEFCnci{?u^Wq$Ql(qNEiUi>il)4$wsMpvx5SKOV6G60VDQUd6mGtQPXI4
zk|L{ni#ZCM2i_d_6jr*Esf9&5f_)paY8T*lI7T@#i}2)&>?_N4A<sw9_zH>txjnD0
z>n7i8{eAsiF8U+VQ6FG%a6DC>?`@ElH4iiJUhVpF9LShNim3c?n9>vbBa)v$@0}uz
znxhWuW;yz(;fuZ3B1u?WU2b~5z_)_aPQX91Drd9Oj{xl45Q)(b5rdnbY8ISb^oe4G
z@y`eE>z*VfsD@NSZ#rV=&}%FcpLheeP6R;E=%tv}4t0Jcz1(R9e_V^s!&XDkJElP9
zq?L}CmbYOO|CZ;8{*@u0Z@nK+g1zwf1^9f?%{V@$Jh#4<d6Ulmz;#X|z~ssRNy2T*
z0k%9pD&n^ZhnW7)U*gASa>8*$emPzP`%~a6z~8i4zC{rW?9>fe{M_#F>POiHT<C1O
z0x&Nq#|FTNCyqsr>$NX5?^jhn{W!h87`M3jb~C|pCI}HZs&+@f?yGZKJYBHCjh0=<
zig{KZP%G$D8Rl5x^e#3^0DalcG!+iXBdY;+O0!<;36%`>Q;V)?y>~AzzkvD%@r0)P
zP$CWsiFz-XN1mya$rlBnb4T<fB$8;YX-?P@vJ%(5)HJ!N?R-z7oR9DgVHH;qxZbRw
z9oi~4`y-+B_Mtd}=pcJm!5C4RGIExxCLVoBXUDF?V^sX9kJF8Q<57&<FWPorq89)J
z0-6{56yX8!cs$rDiOSdmAzB@(!@=NA0Q><VI0d9SA<>NrC=tm+bg1!z-{?=!eJUD+
zai(`Ggu0bXlQ_#Qesaj0Iz1&KkhE;+P5k+bWH;PcUX`X!i3`jJl7J!RK_5=IXyLnE
zV^L@}17<>$09!XkDw^ukXJFbB{oX{?u)K<cz37S>L#2g!5#eSt+_bOJRECY5V)A17
zti!D}WCAX3T_(|&wJmcoHdbW7ry=U)4g}K5!+~-A6;l2=7#0VAKQ^Bk0+u0kcd0vW
z=}R`oC&a{fzthz+XWxVY%%KkfOtRmT5A8XT{6=fo(F(XtIT8)ae4`$s={)%LP>{B7
z1ImSsldz0i3`^i0T#fKzMqKz3WMaXB)^SatI{}+=po0)~e&%RfI=eleifkpQ5B~(y
z1_kna*eWo)t>b*rSBZ?wf>E>3cCNqI*}m<ki)H5pYI~;FHy~^5yp=c`q1O=YX9wP4
zE8Re!oQj9kUl^DZB3Yo?&u)Uxd(txrgbg^6l!3;xcv4rgDIsB$dT!d{EXOotQh$i7
z0(O4#$He0UBRhQ!!88A71^7g0g%Pz;99_XOXXg1ZiS(c7wseFX*Xsc`<1C{!V&(t9
zF-`C&%2}Fzx}nb9+zCq%5Di`oL=mKIL`TJ*3=pYV<w=x`lZlPMWt{Oc?i2hbl=V-?
z?|!cQX1qCm_@K>=iFZ+`1Pe?Fmsg(CdMFn~3pz&bgZqXSlMw_o*O;kQIrI=eA!_Rf
zrra*Tg(-Ja1T8TvGa`2*nJm9#`S<W9hcCC4&i{`AlAxS<k(bRDp4>mh;(QxakpnVl
zSwTJ)d1|723{Qz0u_AJL4hr2UGolAY0F6)9*Lz&=hTqsPgAm7p#bItltF|hQD-u>N
zpmqwDRKa-G+fNSV&?WGCHyeQrgG0`bUOVmqK)?e*z`#g(OVdPR@8MEL<>Ix`yp##2
zFB+>&xx2pX&rrZo_8fIrYjCdAO%Godx_oL-{1<Da5nX9Y<1-m`(<)%j6;VHV;ic~!
zuxhfFJ$g|*%a6xJh?a=^?TxGff!F<b@!0u@?a+E3$Of^<No;vL4t;Hqt#H0-G+<rS
zYKunn@fA*Vv$R(DHc)}`Rr10pcZcF0418VK&Q`GP?Bc9Cc&Q-x25U!CmY27x<mJyJ
zMnmItNh%cKN?uXp?{;yr3`t><CXe`TCT8rjTG+xq+5!%Qdw+98^s}B*-{@edVto2m
zn2Vl1M0{UPWS7-D=51m|vd;LeYS>Flex3e~gMR^{qG)sg-g3}WRnjFBQ1XP><gShk
z!2FXW1gMJPf}Jh+DgMZpqpwloZ}H&R&RKzG3g7o3wX@&o^J)86Iim6c3V)|7!}Jz>
zDm_mu#5`9P6!FVl4M71g<pe7F!U(?+1OA?U$5wjN{Y{(79Yz34{SxyzfCBs_FQ`G^
zIS^^#j)_LSZMhBO+J*?Ofj098i&<xEmK$7uZuI<*Ej>yqAOCd!(~)iA-Cyzef0yYf
zg|>JS%Etp7`&r!}87}M&fdq*_MjP}pRq_y}`DSi%oDlvHOeq>TfGoi;4~mDA`M(g7
z0<3=aVKr+{<yt@h{{9>c!ynPKxsL>{l3d5I`^j-j+ZYW0lPFWs_<mMwB_T5|;$ioo
z2uZ69yuVc7txqVd+EdTg|L+kybV=n9{Bw=I#nb|&=N~;|f^RiB<PJ6%%^ctf&M$Wl
z0cHgOtu+0KKon=_JJIyf*X!X0YM_g7fEo(_T3I#LPR9RbyLt=%z3PlI?Y~EcV}Q;z
zAgeY`m5c#xEkmEFg1?I&;B%-txp_GgWgpH1s2wtr!7b_t|C)P$4W%CAUuzLDtYpEP
zUb8V&mpHup?>FKg$pYSL1TZ9oc1Qrb=dRH#%D@MciU3eVnL&OG;%Lz0bW}KB>4N_}
zn(l%AxGZ;2d5q9*%XqK&*$M*N7b4ltty1<|3%+Lp79HHY8G(PkxLAz8&4S=xy0Dwl
zizBqe4Jes!D?rst{^t{?X`&f;01^YywFbyREad;DLX5ZI0`=c}Dhhf5h=vQe5*O3b
zr*!Z%0myB&u7zL?5gEG}!u$BT$TIp8dKA3Fs6K38x7>znN0KawjDB&jNHo6=cgj{t
ztAWh>o*`A4kTHQ=z@hHlkNw%|c@!dL?(a2C)S%jZECX=L^9ZDs>|RCx%_o74ClLTK
z<jWt&(4UtlYwy{+fZl8M8|*l^Vkf=rJ2{SJ**0Hna-HFdwK_5s{ix8N_h=AcQ#>_m
zty-)>T5c2foM!<_!dTT)OI{*iZ7+K{AJx2{rL1h-cMZs&J^H$ck)e1E)EJRT;mvVS
zU@AY`FdjZN8v|7aeD~DjV=)He0Kh?xj{(jAV*DYn-xJgiz<>T+&-Va)4&;{WC7;y*
zIcmsn3lyA2@L47x>voUIYkEEM;p9F5s6HFfkKm{iq<v;Zzt^0DyC#G;;{YxeR7`k*
zhD~<z<E!gK|C9FzsMf6@M)w3X{mF|9-|w|;;^3dLT;0AA4{T^Wo8?Gjq`~AFykaSQ
zNL7Go#79*oq`LtEKF_Q}ze2S=+7>*99Vkhfbv?gD6RT4kKsqeoApH4A%_J*(<?8Gl
zQxF487KmELdGZ%tBT2NH$~=Rt)is14*kLW8>!~ejai3hM^E$m<kAQmu1ll7tat{N%
zfDR8-XG{5V;j$V$iCZ@yzLvA!=WJcDC{8g1F}GwnYQMu4XdVoOpoazPW;jkRJfJhQ
zUnCW(3c$_XkFSZ=5Gq4b`HZ@*Jfa$UE<>;mqZ{BF$MyZVjPG9VSC(^Athuo$)Y-K3
zV^*`+73k1$1Lj!rlCEbRjw1n;PJDdyR^wrVg!u+0`FG&U(A3`nunBvaY`$on8Sr!2
z2l@bkiV2@JfMzX$KBl@KN3tLaK)V0k<^0s|Z$CkCHM{EoOag{>pa-l5pR~pBR|N3Q
zCqDczS&|2XAzSD>$P5iZ8smZYi?wx4{I+|8kwZJn(GMGNCgc%%XcPJXwb<yP5ce0h
z)l%S6KPRiM_m}_-Q`Z=JKD(8wM$NnSPz*@uA5|S|8qg-Cur0G{6h|9l-n;;*L>?De
zi7;K_0s-K-4{96xxoe&Qg<&+a=Vd0a_V$}@JZx7%$G!EjtZE0r_s$g+&23-M+kQ+c
z0<&7NoE^-_ovpwgv%7fwk@Y(WX~7v+o9ilo?1I=(>#AKzJTT2`K+rQH%K~NyGyPZ;
zH&o7EorT%D?EMvxBpW~{x6LD7vu<bvgliCxeu+WggryO^kyL%uHKoMfAk%fDS3e>t
zb4#Qqb_yW97$AvxZP*(aen7WY>zcY%72PX^`&P5sx_5U!S2qXs3tT$(fX^io0r3w(
zfG(PiJQdbe2-L-rrI`ZiA2oIR8of-i5~?FN?sM!e6uwEZ^YQlPu>2#CoQfkkayuUA
zAP`uDb4`qjt?O^HRsFRZbR@^c(vfFR2Wx|iIc3`T&-z^ap@tPA?DzfmCzts5xY`Dz
zp1vo2OF$I=5~L~p!Az34gD!|ZQZKDOer%4pk~x_({FZg{D@~Sfx!j}T*EWR#-e;R3
zQVbkDw`YK};M%f>)~~sNTx3o2Cq+sfE0PT-u*KmCY<ko3qW!_D)(UwaBu6!+bbv$Q
z+QlRNjtoghGdy2l;R3879(xel>UX^Y8&O7X<Fs}K+rSeC3|uLr^s7o_{4zo7EgDXV
zqiPggchhaLEqx-@9dz<?<~d#ygsX6wfkogU%>zAxWpoo}4zDITJin=`kAY`Oof=q5
z0;Ra51_B6OpzW0%q*9<aj^7_dibf5f`_<l2Mbj7ZRnwsx8s^aJ+tojJtD3+dttd%|
zadFFU;rn?3rwn8SR)|6YF54O}g$<*6w-*}l1D&pqHwtH1N&H1(UPQkLBeS@8oR9g4
zHn9Tua|}+~@*K_6u;jwDU}L8Zb7;si+Aa$ooWoRRlA!K48@0$bvqH+2J|X!bVMTdF
zVWp#T|0##nBtu1`fm>^_Uva+1X1^25gfEoPF@1%48==J6&LHO6u~(MzE;94k%90xa
zT*S$XXAVSpz$H;MwA713X?fqZX~}tHyj=xn$;~9-QoE+L>;CPbE7T8hj^&*##ropB
z1;GB#X(D8Xz_yrNJqR|BaaN|}(b^mk^W?s-HJN%jw`(lcMR>|P(8SQ0Z!yfY6J*W{
z6hvDt=61F>Z$s9(i$+|^Pje`iRzYD%LD06)I2HYfy+K`XZ85kdPUfmjpIO~}v*Eaw
zX6AyF=B{uaLi<Pz68~lEXgfxXm&>}fSL5__k1@bJT}lsySPmv3E=Rz_HqI(juf#k!
zE0uVHKv%4|{|i(E^b&KK0C`j3R!&O%@i%!CRV>9IOQfn}YTgw{9X3tFNY7OK<@5JM
zpJ3~BAWUOc3u89;pHrKN+hcJ#F*7g)n4JjtXjJf;B_%=Z8>{Wj?U}p;()B`~ZBp^>
zLyaXsFL?3i?q|7y2|@-iDviRr2$O<U7Tmeow+L<4tEQ=i?ng?pjj)#?_l|b-VY00;
zuNwyw5oofChe&4Y>KUOaYxiBa08@rIwH<YgBba=tibT&W7;Gr$Kk@5o^G-h!QPrUc
zn(TycLbJi0-d?>=<H+7i|Lnu!Doq&`IKfO%qQk~`Pd+Zg;>*+6&Ha`Jp3DX5&yYv>
zuJT=CU>;fV7vOGOhJFG9+OV-1Fts^;hO9ev04}nQAHgVR{M1s4#g3iOz4649`U(EF
z>lTdUZV)3}PS}q98i|>Vu8VxJE+s(w7t~x*sr))r1&hK~9ZxxitO^$8mdRDws-P=E
zyQjM(l5m-j#o7_-h(?bX`t{e~OHx#<VIrxpU_oMr{M5o+WZfq}5eeJ(8y>n9=z{L|
zuti9mv75C%s62g>o|Op<7DEw<<mIkH&wYzZ%<J4BoE1d99fe?<4dQL;F)dsTC>%xS
zELO)alj%zeAEoqjuTPBv<gR4fUd47OXV}4Nyn(Ef$?vHun4NCA_)jGJ=0=_9{C)Vg
z%q@&o<oq&$QWVhe-ebd~tTEaLoiha}bEx+ad858D<9Z<JFI<4E0KK~JJ4p)nwn5)e
zZ;Y0BaN;T|o|~i%hQloJLJkGuY;ziy=o(PR%qM02uv;kPOMPGXa*v+2_pMmg`|yB|
zro_}G2@1BOBss?7YWJ7r;#7s#!?uZ*qy%2TX53*YzIpRJAJq5XO;+@vdnsRNIG(m3
zatE5m3f}ibREg6@i{x!{r5uebj;#?NbaJb$g<XDPp_?lSiJ~>((A)l^^)1qpk(y<k
zS0|X+vl;&<SxOXD2+a$b`(N^V*)K@&`f^y~9K5BFwo!cX%?2>ZV{5xS<i_%Bx>kn-
zbo@?}`aZ16{CqX};-kx+pFvX+jdpjLk6)^BB|GHSi`exnDYcyV_<<H#gdg7peb`=_
zP)R>|1t!)aNHQkWm!Q+Op$mHxTL_G@DX@XuSe;_>VSG8(Q+C#2k&r(6gxuJ?S^M&Q
zk}(p?nyW791%X5>eiAA<Rc&7ezGAF8$0c%Mhxav9G2bo=HK0u&o-Wq~v9~Qec^q{v
zb9`;4;~9Crx595L4&S=f)b(woZ7gk~Mtlr6sxQ_C!{!quySZfqpfeB5GbK_5C{`?~
zqE+<R-F?2>e-8mhp>L(yS4k56I<7~nVGrwDD~slx7y4zc)is*hRmB9M#>=*_)phZj
z`EB<JMnqIMYA;%kb@aUNlk1X*d0=SfO|{p!!{BYYQhZvx*}Ch&rIHp*V=+GK{~D#A
zuVlBuWM*PIP;kBIz5NKwDB~29a=nx#flqid-B36w9_Kv^)3=;37G?b<9yjtcKjsw$
z^%Ib0!HkKo)z(>%xp0Dj^FA$)1bY41fvuQ&s6*%>rY&7Z3<&r%UUhR%Ma*&|Fmz*t
z5>eC15YE;3iry!*zOPoPE}ca4@rGZdM6&RV{F!p`cGm_UGZiu#(*rAuBz<|5f`?iB
zWM!)N%;A<4fy@K%Cr%68Xb$Ua>!+cWb+DJPw-~h9dQH=Hkg|*xttYae^%v6(R$qhu
z7@qV~>BXSH^kX{NPFKI}iLAc+@#zGcL>`zF9A%DacD|my4zb60&I2NeTxiIG4MW9=
z3#ES67U;BJV%N>Oo)6q481*0BM|n_{M#@2lIiju-o>grx-6fa_C#@)OYDG<LPlaak
zY*Ra2dj(SeD5U&~XxUb}U=)|jWdc1d1rx(KZr;={`z{5MO~VRFYCwxKWpqx7>m4yN
zwW>5ydA$+=`G=EBbv{HxvFAyN{xdjotW&hGoBk*=m|y)I4GUR(_;9Ha!K*2Gt1zjG
zn8*j0KFp$;d$O15R-aADgCa9e#TRHk=BH0(>>DCGP7<eX)QCZ4ed?iq;LL6mV+%NQ
zY}~OX?mq{-?>|r6TZ<}RH~EmCcEzxTtMPSx*bR?vqa<$|OU9C0hk!z@8Kb-(U^$Xn
z@AjZa$%?O7UVJe1OElKeQ$c$bm89NP+4OVmCbpTXm^IF-n2?wO7K0I%gggF*xJuUP
z^eDkEXjuCf3M9Kl`!?(J{Ir*M{P}i7uLF6uMZv-Kn6I+MJQ?3d;EM_9#6+?WR&nzH
z0|W2LfWlbzhZE9C^Sm#o#%9rRjx|?hKyl&D5HBF)uteL5iE^y5(+tNf5#t1el#rPg
zUWx4$`?NnJBV#nsng^`M-XnXiP7y~2JLOKo{|;d_hN>lCQ;P}nj(fGrS?H6AzWice
zUi>I3%sT5?w0m_^2yOlz!pZ2myO}!mHJ-U#m-9B=hc-g%5CU907A7_WrP~Lz76a|)
z9f|8P*5sj?90U2>MU7S@FNw*TgpraGGWjFi%qn;zlA}mST)Ix5l89pIu(9MWJ9yt&
z$3W1=6!#iWH|_PT)ui&fd6q8$ny!YN_jysWB-tj-C#7$1Pz-p?UN7P2Dt%q5gbNtL
z!D9Ilr``-kHd_SA(ow9hlIx9mKH<;G=2zU?!Kd4x7(=O6#+ztPHBQJK{2L~qZ{kKW
zpOWO`d67)3G%J*%U!^9zj$$F_E4CR2NqP|c__)~l{Bl-jBsyoy5ib-C2S(h8jUR!-
z+e12<B2WBOhqs&8zYIrQ*ho=~>2sVPLJ5@sH#vgK$8I|b&+qFpSWHihUR^NGU}%&W
z>*|fX=~?)dG*8)mwbJn8x!@Q60*;bx;=}F-E%onbE%Yo<c+8`^P<cYBY%L>ylq;aM
zjV{CUPt1=_zKIRGL+XABqR+C#@SFTIX9-&jhR;L4IilA`kq^!DW!WKMkwj32q@wF`
z{k*KlkXGfVWMMV*L$nX~snJPYsUyXxsyFo~**-VWPW@f?01_R_*GSqR-WnPDsb`*D
z9Lp$I)jvLX;2Nn?o2RD&?Kx%6%Wl8(YEA!+q{I;tdTu)L@d4wEBVDgT`o}#3O}=sV
zV4z-!W-qz|$@Q}_?SdJZg4UY`VVF~uwxveQ?i&LE9bt+-ZpI^ZJNBVFb8QR4ItPB8
zzEliZSvQ4gNLwqMAtpmZUZ#9quTt7l<Ni0y>zWtw>Gky5BvWsCv;}+rS#UJLm~ipq
ze$MRY+-X1mms#BVaHUco56GsnC?ryyD$6Vsh4jG;=Qi65PCgue6`xuVt|%1{9>aXj
z-e64dVSpi0m<{8zHCh<1vs1W3Q#=bh1k4N8gW?8r`0{2Y0@QK#ke{LVwMX5O0Y1{9
zcj=ef9#oMDrwI78>LVQCL+P$pKXWG2Q2!-JGx$!INaCx@T`e+KR0o3swe;F|Gsk31
zlq!)^R$`x26u|B4QnT-N>?c?!5t+4=I{b@Bs#-o!%z#@$^_IO?MSnon1b;;t`Q#wl
z6PCVaRtzgxQG%ZuuYF|tzi?0dKgp`<)GXWhKXuFosAF((OxZTZ`TvMxPvOE1d9H&u
zMl%as9DA{L*a`~BEnFOfun(oG;o{i7+`?}#l{l3CiDTA~8rx&0|I{(l-znwe9=B<>
zMgR0RaGf%^ZnjVFFb4#Si<X6CvvlBET2V51Xng5mH-kDJc-5tPQyjd1kl?zXNMIGh
zqac;PN+7tDh7G>08`J7jX@CDepNar)6Y6faN)jYVgbzLo<=zJ-cyo87fGtkXTBcAW
z55zc<J-Y=Gc=rgvAN=d2VqG%(|A>q>^q>zsZnc{7Ia+o9>lkvlRO(<?rq4J%1Rl_O
z`o8TuP#8Ul$m-g2UB5F<qrrcDFU4s=+Ija)cOU~=WWx9?ALQ9SL|AD(rD?+xIQmV_
zLGDuwTm&ptTmv*szD1lZRq`?6$c%jjTPf9)@C^uv!#7Slxb|7o8tf#;m=py9zKngg
z=I}rK0utZ$@X8aVB2l3HRc~5x!ha2ifA874#@m_qXz;}h@1P`V2(HeZ9`8}fIi~z4
zNxGq&QQGT0jH8USo|`uV2O(9>4i*|tXB{$HZgiwSh{12glrQ>aMS{Ny&^gkpW`#?F
z-Q~`ozzH@oU4*570XbS=bniJ)InNy#-s6vje=Rc-d{ay)WbJ!kM)6lp1wW2JQ9!3u
z(v^RDZ`R>&diH-;TI&#8yHYTy`-2w5LUB%T6+ajkgSkm`z~U7Ori^5#YcOtv%3n<s
zD_Bf=t?8vNB?w``MK7T57-C_blce=hzdSevhVk<SVT*o{G9JPedY<JAT&B9lQC6U)
zJdHGj#^A!l0NgtU=|zrJtC)XNuSf|#ZHCpJ`}vL|AX@2Up{xT@PfE1p<qkN!Cs}4Y
zRq%lJ8f13?Lp}m$GZVw(yv<wwcIxyQe0Fi*&EuBA%-|Zpr9Y7M*vCQEHQ@R};q}fW
zs|YZWB(8T$3L)rsdlNQ}>eS@X*R_PVkKqG#__RpYY7XwWf-G4#P$+@x_e=1r6Olv=
z&>F@b*yiy7*Bw}{DZ}Rg<#unSC`Dj{9OND_)<&2PNC2_ljA47@`&mN{PNSTt_-wFW
zgA2uDxLQ6xvw~l_G23B;f&~1_T?l9{f55oGqaj&u;aZ{F6-d(70<~mAnA)}LG^;8G
z`N^!7W;*{BIO_;MbqbDu9)X7USP|Lw0oJCv0Yc6ijVzhkhu!UFKIQN}NFf3nm3Zx`
zCfpdF((UsBM?>ZLNfMt@zeB{CECc3QAUOL8)JSS*ij{f@n5@glcw+di-N2~?DUDmu
z)$QPXEf9Foz)|605bYxqJOpQPK}R2hBbIMLr+0%dNxKGuXv$`51TlsfFqd~rfku~K
z(sb1Ey&EK#L2$e{FyS-MVEKUZm)gA^K>;R6<n~NqAUHubvI`H@9YQVhoUWgM9-Tmv
zxaI4Irun~_Ig396%w#r(^!W?W?1yrM`wrlYZqXF{KriuToYo64HVe(;ykqtDAkel5
zdQ44Zn|S>GddU+pJ2;CLEbiwH6az!SUh;|g`P7r27yGlZv$Hye@xDbs_H_m)forbW
z&>=+SEO0Kx(W+}eG@Kx|b^W*>O~kA>2%bIuA2Xrujb_SWZq6g);wQQ115#76evvYN
z{<l~viNJTJ>)00fpD8#IN?B@rV{aM{$>q{Y(2RcMKCM?uQ=6m+)OEqctt3Q@`y^Mx
zude{5Y&aRAPaNkNri9r4+(<}_(}vw!myGl1P)9)m(mk@YqWuSOW-g2+)|4!1gkV_L
z7o3@<%Mpn>2M1$|BdN*l*`H18mB=OTd=;K})$pnDhGe~B5e+_joq-tvq60;z_yjuP
z0&FG}5ivmEk0i#XfXngzntF@@K!1D`Fv?OM=ZZrQeVNt<2lskciC1Ot)%<6-moVIR
z;m58)42R(RXRu~MCvkymOxxfkj9=HW&x*^;Ctd@X!xf@*Xe17W;JlG1LB&sSzR;p{
z{2l2mKMhBxtH2F!;@@RlDi6v>`sh_9$jqmG`wlYL4<mU!wnpklgCHl}Q<c>~=!T=3
zai6sycpHQnUVs4CV<5F|?rp}+U}p6ACgueE0%`X<;QqRkPlB1W@ET+fGT>sCS4<E`
z8p-tySP8?Z{y;J25v22P^$SKNF(VrQKf;4AT#X}il;3T>nce&yt)hCcG2m9B*sR~n
z1BxoQhD_80h~Wx^bHcUr0>D}u0&}0zneli2dpky^H+GI%G0YF+=&;qWdVq_b$X%D>
z^kzpsXs;$oeOzD^p?~eBU-JH!{-7Cr*GLpO`9olJ$ZrAODdZ&U6c1KO7AwYBCs+&~
z2O*sq@Xb9q1Ma~oVjY3F!Ua%zfoS;DUdD*0a0~bmz!V0XOcRLA!EIucBm6XAI?QwL
zC*5k$=tep(fSq2$6*TH$fgJTprfJ|Wkh9a(GY`k*%gw?{RqmjHK&@gXm}9PohxB<$
z^F-R4d&=_5EPmG7C))gbzZQCP$T(eJgx>>o7;r?4{`|i4yXNhZm|nMk(T^i!c4+2O
z%9x=S(%w?<jDKU4P4znK|0(9`qmsPBxRRChMQN^&snOEXX{#Jq3oWMQ7pe52Gf3x_
zFqN2@$yUp~n8!FPb*zIK7-ib}FrWm~f->!cJf=-g$}-a_ZL1UJYR+Y4wdVq_o}K>M
zc8=q}_dW33dw=)2&;8x|{2us+mb`8a861iSod^mAajJ%~op%-!9wbsI4`;<>)z1}m
z&*t7HN6f||*Ej}@aa+uGhl>HPUWjwwd~&SC^~BnVF9ysa&q?k1%zaM#&WrbURv(Pl
z#Gp9EDyA>h%`nxh@4XK?q@8~}XyH`m9Vx%}tghc>Wui#=5-R(My1fR;wf@C3^G_;d
zHCSl6d+ots^@~!*g<QTqZ^oHj38&vp>0Z8yg++oit1c~Ujk~JrY~xVr7L&)%l3GR_
zCpTO@7jBW?FH5zsS;_h1V^tHoC^Hw{7$o_gqZU#%7H3WRP)N%-Y*YY=g}Q`b(d>zW
zfGl2HoaD0Ndqab%k#Ra|y36KCIjYjAt6ADS?K`Rh$ESv5g?49BAQ}wj(Hwzx9_7`R
znlLgA2^HPgJOn&cLvdn{f2rI=@SYmh*;gD?tO&9YfO2xPq@SBh6qi4cZ<OWGi*lS~
zWp888_I<|HxO#VZBdWCo+049pX^dx?qsh=MSY+AcfSqVb_VPWCG8B(eG)~Paw-s3E
zO#ui_`OzTw{PTp+ZbEd)3Vm9So-+n{zjlQ!CH#6wEwY-(_MdO1k%k3_<MEU*`dX~{
zR;9=(EC&*l6fc$td9JIYmu8!=?>$eZGY~#%j~~J;+hpZsUN?6nGCVM71rd!fIZ%ol
zPu8EB6Xi3UW?hCxaqTF4jhTcf#jfpJijjp0KqOtd&xI3m$&h88Dyj=XCQjN+RL)t%
zK+k}#M>7SP&;`kUrBocnF=&l~02A{SZ{gf*0R{xM+U;9i7C_G04Ls6wN|`)41zKZ2
ze|+ZyfU*$X&YMW2t{?R;pU%L(SQ>aJYg=;06y+{wbs93sDguA>hivM0+OpKvlW=H0
zmcZBLYyFAvw>Jsv#*y6bo`^n@aB*iQ)Cv{qxF4h4isEhG*9ez1m`oyTvaB-;4sBDJ
z+!z~MvutW5T+ju`zT0RU5BzdNF!0&Qp8fa`$=NbsSg9dgE0Ac^%*o(7R7vB!;a91U
zL@%Eu9g9;qF`Wt+wlV!MYs|IaCxzj2liorWtg|u-y}>NAKir*v+M9cAn@S%NI(3<+
zm!Fws+g(w0)3kafH{Xp`)t1~ohd8}Zm_}V4=vT!6`pqLOpWN~MamNm8!<f)PJr9y^
z1xl(#ca40;2+;7e1|MC(acBo_&be-s;SGQhpzte(1PS;Nq8B_}y_wg9yO@>=GXPb%
zG*m)|=8i4^qwN)yT<asKFl2zXl7wB7+ZGX8{SAuV+bnBffnV_>VNn`IKi$N82IW7T
zSWvbO3m&RqT2swe((dCut1o~uLRyv4cGwWp+0IOE<v>q$Ki;#XP!QeQLJZ6@7FMDJ
z_qj8I$mjxsy@AN+0)l}O<L`+4OF(8W(wFNq%?ILt83rE?Tv22n>A*wmYpBQWfnu}k
z1#axE6a1qIk&(iWWDqF^u}^Qcj&Cu*3@;2PJ4>k`KtQY>&Nu0xQ(*6x$@kM+9Jead
zucddU54b4#RiVh6I-KfPaS>g(`vgSNM5FidgF(F5j>8Fp3rif%ZqD^7z*IGc-qwrW
zw2BZrJ3<C3leaozk;AJ=M<U|KK{`Iok}mDL3_@tlUCz8l@CT^a0`)UeU7nk=mgT!Z
z(PXlCj`)z>Z?)2T-|Zc6dMKzh73uacy7)XX-8`+E6%zmCip)i2Hk!)<(SfvilY@Hy
z#-`PEM$2skiYM8vM6xFDK%~6fW#rvTq8FwKAdLj2`LIZ&iIwZ{MFN$rzq^jaBPm2j
z&hEAd+}p)MzZ)4NkueF?lMrKl@EZd<p~3fu;SlH{Cc^5OAE(@4TeSAz_W{`W|FiY+
z!~36@KKf%vL(i~<r~lY`a0$Em^Y#r}lz5l{(JYZ%@rrbcpzrClKhS3fpYYXDt5iXW
GyZ-`RLdL@Y


From 2edb69a7891c63c1d1e8c760e53eed0c3537dfbf Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Fri, 25 Aug 2017 00:44:29 +0000
Subject: [PATCH 101/170] with in-place option

---
 .../v2/framework/tests/gradient_checker.py    | 21 ++++++++++++-------
 .../v2/framework/tests/test_scatter_op.py     |  3 ++-
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py
index 8eb9f3f073..ac37671c77 100644
--- a/python/paddle/v2/framework/tests/gradient_checker.py
+++ b/python/paddle/v2/framework/tests/gradient_checker.py
@@ -32,7 +32,8 @@ def get_numeric_gradient(op,
                          output_name,
                          input_to_check,
                          delta=0.005,
-                         local_scope=None):
+                         local_scope=None,
+                         in_place=False):
     """
     Get Numeric Gradient for an operator's input.
     
@@ -90,9 +91,10 @@ def get_numeric_gradient(op,
     # we only compute gradient of one element each time.
     # we use a for loop to compute the gradient of every element.
     for i in xrange(tensor_size):
-        for var_name in input_values:
-            tensor_ = local_scope.find_var(var_name).get_tensor()
-            tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace())
+        if in_place:
+            for var_name in input_values:
+                tensor_ = local_scope.find_var(var_name).get_tensor()
+                tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace())
         # get one input element throw it's index i.
         origin = tensor_to_check.get_float_element(i)
 
@@ -102,9 +104,10 @@ def get_numeric_gradient(op,
         y_pos = get_output()
 
         # plus delta to this element, run op and get the sum of the result tensor.
-        for var_name in input_values:
-            tensor_ = local_scope.find_var(var_name).get_tensor()
-            tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace())
+        if in_place:
+            for var_name in input_values:
+                tensor_ = local_scope.find_var(var_name).get_tensor()
+                tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace())
         x_neg = origin - delta
         tensor_to_check.set_float_element(i, x_neg)
         y_neg = get_output()
@@ -257,6 +260,7 @@ class GradientChecker(unittest.TestCase):
                    output_name,
                    no_grad_set=None,
                    only_cpu=False,
+                   in_place=False,
                    max_relative_error=0.005):
         """
         :param forward_op: used to create backward_op
@@ -289,7 +293,8 @@ class GradientChecker(unittest.TestCase):
 
         # get numerical gradients
         numeric_grads = [
-            get_numeric_gradient(forward_op, input_vars, output_name, name)
+            get_numeric_gradient(
+                forward_op, input_vars, output_name, name, in_place=in_place)
             for name in inputs_to_check
         ]
 
diff --git a/python/paddle/v2/framework/tests/test_scatter_op.py b/python/paddle/v2/framework/tests/test_scatter_op.py
index e7696844d5..861fe6cf89 100644
--- a/python/paddle/v2/framework/tests/test_scatter_op.py
+++ b/python/paddle/v2/framework/tests/test_scatter_op.py
@@ -31,7 +31,8 @@ class TestScatterGradOp(GradientChecker):
         output_np[index_np] += updates_np
         inputs = {'Ref': ref_np, 'Index': index_np, 'Updates': updates_np}
         # check gradient
-        self.check_grad(op, inputs, set(["Updates", "Ref"]), "Out")
+        self.check_grad(
+            op, inputs, set(["Updates", "Ref"]), "Out", in_place=True)
 
 
 if __name__ == "__main__":

From f22ece9273b54f1a248f7a787e252eb04a5acea3 Mon Sep 17 00:00:00 2001
From: Yi Wang <yiwang01@baidu.com>
Date: Thu, 24 Aug 2017 19:44:19 -0700
Subject: [PATCH 102/170] Add a document on building using Docker

---
 Dockerfile                     |  4 +-
 doc/howto/dev/build_en.md      | 83 ++++++++++++++++++++++++++++++++++
 paddle/scripts/docker/build.sh |  6 +--
 3 files changed, 87 insertions(+), 6 deletions(-)
 create mode 100644 doc/howto/dev/build_en.md

diff --git a/Dockerfile b/Dockerfile
index 98f61ba586..136db772cc 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,13 +10,11 @@ RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ub
 ARG WITH_GPU
 ARG WITH_AVX
 ARG WITH_DOC
-ARG WITH_STYLE_CHECK
 
 ENV WOBOQ OFF
-ENV WITH_GPU=${WITH_GPU:-OFF}
+ENV WITH_GPU=${WITH_GPU:-ON}
 ENV WITH_AVX=${WITH_AVX:-ON}
 ENV WITH_DOC=${WITH_DOC:-OFF}
-ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
 
 ENV HOME /root
 # Add bash enhancements
diff --git a/doc/howto/dev/build_en.md b/doc/howto/dev/build_en.md
new file mode 100644
index 0000000000..80488a147d
--- /dev/null
+++ b/doc/howto/dev/build_en.md
@@ -0,0 +1,83 @@
+# Build PaddlePaddle from Source Code and Run Unit Test
+
+## What Developers Need
+
+To contribute to PaddlePaddle, you need
+
+1. A computer -- Linux, BSD, Windows, MacOS, and
+1. Docker.
+
+Nothing else.  Not even Python and GCC, because you can install all build tools into a Docker image.
+
+## General Process
+
+1. Retrieve source code.
+
+   ```bash
+   git clone https://github.com/paddlepaddle/paddle
+   ```
+
+2. Install build tools.
+
+   ```bash
+   cd paddle; docker build -t paddle:dev .
+   ```
+
+3. Build from source.
+
+   ```bash
+   docker run -v $PWD:/paddle paddle:dev
+   ```
+
+4. Run unit tests.
+
+   ```bash
+   docker run -v $PWD:/paddle paddle:dev "cd/build; ctest"
+   ```
+
+
+## Docker, Or Not?
+
+- What is Docker?
+
+  If you haven't heard of it, consider it something like Python's virtualenv.
+
+- Docker or virtual machine?
+
+  Some people compare Docker with VMs, but Docker doesn't virtualize any hardware, and it doesn't run a guest OS.
+
+- Why Docker?
+
+  Using a Docker image of build tools standardize the building environment, and easier for others to reproduce your problem, if there is any, and help.
+
+  Also, some build tools don't run on Windows or Mac or BSD, but Docker runs almost everywhere, so developers can use whatever computer they want.
+
+- Can I don't use Docker?
+
+  Sure, you don't have to install build tools into a Docker image; instead, you can install them onto your local computer.  This document exists because Docker would make the development way easier.
+
+- How difficult is it to learn Docker?
+
+  It takes you ten minutes to read https://docs.docker.com/get-started/ and saves you more than one hour to install all required build tools, configure them, and upgrade them when new versions of PaddlePaddle require some new tools.
+
+- Docker requires sudo
+
+  An owner of a computer has the administrative privilege, a.k.a., sudo.  If you use a shared computer for development, please ask the administrator to install and configure Docker.  We will do our best to support rkt, another container technology that doesn't require sudo.
+
+- Can I use my favorite IDE?
+
+  Yes, of course.  The source code resides on your local computer, and you can edit it using whatever editor you like.
+
+  Many PaddlePaddle developers are using Emacs.  They add the following few lines into their `~/.emacs` configure file:
+
+  ```emacs
+  (global-set-key "\C-cc" 'compile)
+  (setq compile-command
+   "docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev")
+  ```
+
+  so they could type `Ctrl-C` and `c` to build PaddlePaddle from source.
+
+- How many parallel building processes does the Docker container run?
+
+  Our building Docker image runs a Bash script https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh, which calls `make -j$(nproc)` to starts as many processes as the number of your processors.
diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh
index 2941662f34..7bab814ae8 100644
--- a/paddle/scripts/docker/build.sh
+++ b/paddle/scripts/docker/build.sh
@@ -38,7 +38,7 @@ Configuring cmake in /paddle/build ...
       -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON}
       -DCUDNN_ROOT=/usr/
       -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
-      -DWITH_TESTING=${WITH_TESTING:-OFF}
+      -DWITH_TESTING=${WITH_TESTING:-ON}
       -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
 ========================================
 EOF
@@ -56,8 +56,8 @@ cmake .. \
       -DWITH_C_API=${WITH_C_API:-OFF} \
       -DWITH_PYTHON=${WITH_PYTHON:-ON} \
       -DCUDNN_ROOT=/usr/ \
-      -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} \
-      -DWITH_TESTING=${WITH_TESTING:-OFF} \
+      -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON} \
+      -DWITH_TESTING=${WITH_TESTING:-ON} \
       -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
 
 cat <<EOF

From 6dcff9a444c263c5c40de9abaf7994cb76ed1856 Mon Sep 17 00:00:00 2001
From: hedaoyuan <hedaoyuan@github.com>
Date: Fri, 25 Aug 2017 11:36:38 +0800
Subject: [PATCH 103/170] Neon depthwise conv with filterSize = 3 and stride =
 2.

---
 paddle/function/neon/NeonDepthwiseConv.cpp | 115 ++++++++++++++++++++-
 1 file changed, 114 insertions(+), 1 deletion(-)

diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp
index c017241c92..53d14d9833 100644
--- a/paddle/function/neon/NeonDepthwiseConv.cpp
+++ b/paddle/function/neon/NeonDepthwiseConv.cpp
@@ -153,6 +153,109 @@ struct DepthwiseConvKernel<3, 1> {
   }
 };
 
+/**
+ * Each step calculates four elements of the output.
+ * First step:
+ *   R0[0, 2, 4, 6...] * K[0][0]
+ *   R0[1, 3, 5, 7...] * K[0][1]
+ *   R0[2, 4, 6, 8...] * K[0][2]
+ *   R1[0, 2, 4, 6...] * K[1][0]
+ *   R1[1, 3, 5, 7...] * K[1][1]
+ *   R1[2, 4, 6, 8...] * K[1][2]
+ *   R2[0, 2, 4, 6...] * K[2][0]
+ *   R2[1, 3, 5, 7...] * K[2][1]
+ *   R2[2, 4, 6, 8...] * K[2][2]
+ * ------------------------------
+ *     Output[0, 1, 2, 3]
+ */
+template <>
+struct DepthwiseConvKernel<3, 2> {
+  static void run(const float* inputData,
+                  const float* filterData,
+                  int inputHeight,
+                  int inputWidth,
+                  int outputChannels,
+                  int outputHeight,
+                  int outputWidth,
+                  int filterMultiplier,
+                  float* outputData) {
+    const int steps = outputWidth >> 2;
+    const int remain = outputWidth & 3;
+    for (int c = 0; c < outputChannels; c++, filterData += 9) {
+      // Load the filters
+      float32x4_t k[3];
+      k[0] = vld1q_f32(filterData);
+      k[1] = vld1q_f32(filterData + 3);
+      k[2] = vld1q_f32(filterData + 6);
+      k[0] = vsetq_lane_f32(0.f, k[0], 3);
+      k[1] = vsetq_lane_f32(0.f, k[1], 3);
+      k[2] = vsetq_lane_f32(0.f, k[2], 3);
+
+      const float* start =
+          inputData + (c / filterMultiplier) * (inputHeight * inputWidth);
+      float32x4_t input[3][3];
+      for (int h = 0; h < outputHeight; h++) {
+        const float* r0 = start + 2 * h * inputWidth;
+        const float* r1 = start + (2 * h + 1) * inputWidth;
+        const float* r2 = start + (2 * h + 2) * inputWidth;
+        for (int s = 0; s < steps; s++) {
+          // Load the inputs
+          float32x4_t data1;
+          float32x4x2_t data2;
+
+          data2 = vld2q_f32(r0);
+          input[0][0] = data2.val[0];
+          input[0][1] = data2.val[1];
+          data1 = vld1q_f32(r0 + 8);
+          input[0][2] = vextq_f32(data2.val[0], data1, 1);
+
+          data2 = vld2q_f32(r1);
+          input[1][0] = data2.val[0];
+          input[1][1] = data2.val[1];
+          data1 = vld1q_f32(r1 + 8);
+          input[1][2] = vextq_f32(data2.val[0], data1, 1);
+
+          data2 = vld2q_f32(r2);
+          input[2][0] = data2.val[0];
+          input[2][1] = data2.val[1];
+          data1 = vld1q_f32(r2 + 8);
+          input[2][2] = vextq_f32(data2.val[0], data1, 1);
+
+          float32x4_t tmp1 = vdupq_n_f32(0.f);
+          float32x4_t tmp2 = vdupq_n_f32(0.f);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[1][0], k[1], 0);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[1][1], k[1], 1);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[1][2], k[1], 2);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2);
+          tmp1 = vaddq_f32(tmp1, tmp2);
+
+          vst1q_f32(outputData, tmp1);
+          r0 += 8;
+          r1 += 8;
+          r2 += 8;
+          outputData += 4;
+        }
+
+        for (int r = 0; r < remain; r++) {
+          float32x4_t i0 = vld1q_f32(r0);
+          float32x4_t i1 = vld1q_f32(r1);
+          float32x4_t i2 = vld1q_f32(r2);
+          *outputData = conv3x3(i0, i1, i2, k[0], k[1], k[2]);
+          r0 += 2;
+          r1 += 2;
+          r2 += 2;
+          outputData++;
+        }
+      }
+    }
+  }
+};
+
 /**
  * Each step calculates four elements of the output.
  */
@@ -326,7 +429,7 @@ public:
     }
 
     for (size_t i = 0; i < batchSize; i++) {
-      if (filterWidth == 3) {
+      if (filterWidth == 3 && strideH() == 1) {
         DepthwiseConvKernel<3, 1>::run(inputPadding,
                                        filterData,
                                        inputHeight,
@@ -336,6 +439,16 @@ public:
                                        outputWidth,
                                        filterMultiplier,
                                        outputData);
+      } else if (filterWidth == 3 && strideH() == 2) {
+        DepthwiseConvKernel<3, 2>::run(inputPadding,
+                                       filterData,
+                                       inputHeight,
+                                       inputWidth,
+                                       outputChannels,
+                                       outputHeight,
+                                       outputWidth,
+                                       filterMultiplier,
+                                       outputData);
       } else if (filterWidth == 4) {
         DepthwiseConvKernel<4, 1>::run(inputPadding,
                                        filterData,

From 9fdf3970d0de568db4a9a3b757335604430ca137 Mon Sep 17 00:00:00 2001
From: Yi Wang <yiwang01@baidu.com>
Date: Thu, 24 Aug 2017 20:37:39 -0700
Subject: [PATCH 104/170] Update unit test running and CUDA

---
 doc/howto/dev/build_en.md | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/doc/howto/dev/build_en.md b/doc/howto/dev/build_en.md
index 80488a147d..de0733f963 100644
--- a/doc/howto/dev/build_en.md
+++ b/doc/howto/dev/build_en.md
@@ -29,12 +29,25 @@ Nothing else.  Not even Python and GCC, because you can install all build tools
    docker run -v $PWD:/paddle paddle:dev
    ```
 
+   This builds a CUDA-enabled version and writes all binary outputs to directory `./build` of the local computer, other than the Docker container.  If we want to build only the CPU part, we can type
+
+   ```bash
+   docker run -e WITH_GPU=OFF -v $PWD:/paddle paddle:dev
+   ```
+
 4. Run unit tests.
 
+   To run all unit tests using the first GPU of a node:
+
    ```bash
-   docker run -v $PWD:/paddle paddle:dev "cd/build; ctest"
+   NV_GPU=0 nvidia-docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest"
    ```
 
+   If we used `WITH_GPU=OFF` at build time, it generates only CPU-based unit tests, and we don't need nvidia-docker to run them.  We can just run
+
+   ```bash
+   docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest"
+   ```
 
 ## Docker, Or Not?
 

From f00c4112d2ca1d42c60d154002b2347ba2de5cd9 Mon Sep 17 00:00:00 2001
From: hedaoyuan <hedaoyuan@github.com>
Date: Fri, 25 Aug 2017 11:53:45 +0800
Subject: [PATCH 105/170] Neon depthwise conv with filterSize = 4 and stride =
 2.

---
 paddle/function/neon/NeonDepthwiseConv.cpp | 122 ++++++++++++++++++++-
 1 file changed, 121 insertions(+), 1 deletion(-)

diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp
index 53d14d9833..3fe28b1de3 100644
--- a/paddle/function/neon/NeonDepthwiseConv.cpp
+++ b/paddle/function/neon/NeonDepthwiseConv.cpp
@@ -364,6 +364,116 @@ struct DepthwiseConvKernel<4, 1> {
   }
 };
 
+/**
+ * Each step calculates four elements of the output.
+ */
+template <>
+struct DepthwiseConvKernel<4, 2> {
+  static void run(const float* inputData,
+                  const float* filterData,
+                  int inputHeight,
+                  int inputWidth,
+                  int outputChannels,
+                  int outputHeight,
+                  int outputWidth,
+                  int filterMultiplier,
+                  float* outputData) {
+    const int steps = outputWidth >> 2;
+    const int remain = outputWidth & 3;
+    for (int c = 0; c < outputChannels; c++, filterData += 16) {
+      // Load the filters
+      float32x4_t k[4];
+      k[0] = vld1q_f32(filterData);
+      k[1] = vld1q_f32(filterData + 4);
+      k[2] = vld1q_f32(filterData + 8);
+      k[3] = vld1q_f32(filterData + 12);
+
+      const float* start =
+          inputData + (c / filterMultiplier) * (inputHeight * inputWidth);
+      float32x4_t input[4][4];
+      for (int h = 0; h < outputHeight; h++) {
+        const float* r0 = start + 2 * h * inputWidth;
+        const float* r1 = start + (2 * h + 1) * inputWidth;
+        const float* r2 = start + (2 * h + 2) * inputWidth;
+        const float* r3 = start + (2 * h + 3) * inputWidth;
+        for (int s = 0; s < steps; s++) {
+          // Load the inputs
+          float32x4x2_t data1;
+          float32x4x2_t data2;
+
+          data1 = vld2q_f32(r0);
+          data2 = vld2q_f32(r0 + 8);
+          input[0][0] = data1.val[0];
+          input[0][1] = data1.val[1];
+          input[0][2] = vextq_f32(data1.val[0], data2.val[0], 1);
+          input[0][3] = vextq_f32(data1.val[1], data2.val[1], 1);
+
+          data1 = vld2q_f32(r1);
+          data2 = vld2q_f32(r1 + 8);
+          input[1][0] = data1.val[0];
+          input[1][1] = data1.val[1];
+          input[1][2] = vextq_f32(data1.val[0], data2.val[0], 1);
+          input[1][3] = vextq_f32(data1.val[1], data2.val[1], 1);
+
+          data1 = vld2q_f32(r2);
+          data2 = vld2q_f32(r2 + 8);
+          input[2][0] = data1.val[0];
+          input[2][1] = data1.val[1];
+          input[2][2] = vextq_f32(data1.val[0], data2.val[0], 1);
+          input[2][3] = vextq_f32(data1.val[1], data2.val[1], 1);
+
+          data1 = vld2q_f32(r3);
+          data2 = vld2q_f32(r3 + 8);
+          input[3][0] = data1.val[0];
+          input[3][1] = data1.val[1];
+          input[3][2] = vextq_f32(data1.val[0], data2.val[0], 1);
+          input[3][3] = vextq_f32(data1.val[1], data2.val[1], 1);
+
+          float32x4_t tmp1 = vdupq_n_f32(0.f);
+          float32x4_t tmp2 = vdupq_n_f32(0.f);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[0][3], k[0], 3);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[1][0], k[1], 0);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[1][1], k[1], 1);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[1][2], k[1], 2);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[1][3], k[1], 3);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[2][3], k[2], 3);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[3][0], k[3], 0);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[3][1], k[3], 1);
+          tmp1 = vmlaq_laneq_f32(tmp1, input[3][2], k[3], 2);
+          tmp2 = vmlaq_laneq_f32(tmp2, input[3][3], k[3], 3);
+          tmp1 = vaddq_f32(tmp1, tmp2);
+
+          vst1q_f32(outputData, tmp1);
+          r0 += 8;
+          r1 += 8;
+          r2 += 8;
+          r3 += 8;
+          outputData += 4;
+        }
+
+        for (int r = 0; r < remain; r++) {
+          float32x4_t i0 = vld1q_f32(r0);
+          float32x4_t i1 = vld1q_f32(r1);
+          float32x4_t i2 = vld1q_f32(r2);
+          float32x4_t i3 = vld1q_f32(r3);
+          *outputData = conv4x4(i0, i1, i2, i3, k[0], k[1], k[2], k[3]);
+          r0 += 2;
+          r1 += 2;
+          r2 += 2;
+          r3 += 2;
+          outputData++;
+        }
+      }
+    }
+  }
+};
+
 template <DeviceType Device>
 class NeonDepthwiseConvFunction : public ConvFunctionBase {
 public:
@@ -449,7 +559,7 @@ public:
                                        outputWidth,
                                        filterMultiplier,
                                        outputData);
-      } else if (filterWidth == 4) {
+      } else if (filterWidth == 4 && strideH() == 1) {
         DepthwiseConvKernel<4, 1>::run(inputPadding,
                                        filterData,
                                        inputHeight,
@@ -459,6 +569,16 @@ public:
                                        outputWidth,
                                        filterMultiplier,
                                        outputData);
+      } else if (filterWidth == 4 && strideH() == 2) {
+        DepthwiseConvKernel<4, 2>::run(inputPadding,
+                                       filterData,
+                                       inputHeight,
+                                       inputWidth,
+                                       outputChannels,
+                                       outputHeight,
+                                       outputWidth,
+                                       filterMultiplier,
+                                       outputData);
       }
 
       inputPadding += inputChannels * inputHeight * inputWidth;

From 06fad3fe9deccdc8ee4721ff028753f53c7ab87f Mon Sep 17 00:00:00 2001
From: gongweibao <weibao.gong@gmail.com>
Date: Fri, 25 Aug 2017 12:01:57 +0800
Subject: [PATCH 106/170] hidden capi symbols (#3636)

hidden capi symbols
---
 paddle/capi/CMakeLists.txt | 3 +++
 paddle/capi/export.map     | 6 ++++++
 paddle/capi/export.sym     | 0
 3 files changed, 9 insertions(+)
 create mode 100644 paddle/capi/export.map
 create mode 100644 paddle/capi/export.sym

diff --git a/paddle/capi/CMakeLists.txt b/paddle/capi/CMakeLists.txt
index 11022d1754..dde99ab340 100644
--- a/paddle/capi/CMakeLists.txt
+++ b/paddle/capi/CMakeLists.txt
@@ -53,7 +53,10 @@ add_custom_target(paddle_capi_whole ALL
 set_target_properties(paddle_capi_whole
   PROPERTIES IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/${capi_whole_library})
 
+set(LINK_FLAGS " -Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/export.sym -Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/export.map")
+# TODO: merge mkl into paddle_capi_shared
 add_library(paddle_capi_shared SHARED ${CAPI_SOURCES})
+set_target_properties(paddle_capi_shared	PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
 target_include_directories(paddle_capi_shared PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
 link_paddle_exe(paddle_capi_shared)
 
diff --git a/paddle/capi/export.map b/paddle/capi/export.map
new file mode 100644
index 0000000000..8d673f675d
--- /dev/null
+++ b/paddle/capi/export.map
@@ -0,0 +1,6 @@
+{
+	global:
+		paddle_*;
+	local:
+		*;
+};
diff --git a/paddle/capi/export.sym b/paddle/capi/export.sym
new file mode 100644
index 0000000000..e69de29bb2

From 1e61d91f24e9213ab43edc62cf2c6f9e47a62d1f Mon Sep 17 00:00:00 2001
From: Yi Wang <yiwang01@baidu.com>
Date: Thu, 24 Aug 2017 21:38:13 -0700
Subject: [PATCH 107/170] Update index and add Chinese version

---
 doc/howto/dev/build_cn.md | 100 ++++++++++++++++++++++++++++++++++++++
 doc/howto/dev/build_en.md |   6 ++-
 doc/howto/index_cn.rst    |   1 +
 doc/howto/index_en.rst    |   1 +
 4 files changed, 107 insertions(+), 1 deletion(-)
 create mode 100644 doc/howto/dev/build_cn.md

diff --git a/doc/howto/dev/build_cn.md b/doc/howto/dev/build_cn.md
new file mode 100644
index 0000000000..dc372de9fa
--- /dev/null
+++ b/doc/howto/dev/build_cn.md
@@ -0,0 +1,100 @@
+# 编译PaddlePaddle和运行单元测试
+
+## 需要的软硬件
+
+为了开发PaddlePaddle，我们需要
+
+1. 一台电脑，可以装的是 Linux, BSD, Windows 或者 MacOS 操作系统，以及
+1. Docker。
+
+不需要其他任何软件了。即便是 Python 和 GCC 都不需要，因为我们会把所有编译工具都安装进一个 Docker image 里。
+
+## 总体流程
+
+1. 获取源码
+
+   ```bash
+   git clone https://github.com/paddlepaddle/paddle
+   ```
+
+2. 安装工具
+
+   ```bash
+   cd paddle; docker build -t paddle:dev .
+   ```
+
+3. 编译
+
+   ```bash
+   docker run -v $PWD:/paddle paddle:dev
+   ```
+
+   这个命令编译出一个 CUDA-enabled 版本。所有二进制文件会被写到本机的 `./build` 目录，而不是写到 Docker container 里。如果我们只需要编译一个只支持 CPU 的版本，可以用
+
+   ```bash
+   docker run -e WITH_GPU=OFF -v $PWD:/paddle paddle:dev
+   ```
+
+4. 运行单元测试
+
+   用本机的第一个 GPU 来运行包括 GPU 单元测试在内的所有单元测试：
+
+   ```bash
+   NV_GPU=0 nvidia-docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest"
+   ```
+
+   如果编译的时候我们用了 `WITH_GPU=OFF` 选项，那么编译过程只会产生 CPU-based 单元测试，那么我们也就不需要 nvidia-docker 来运行单元测试了。我们只需要：
+
+   ```bash
+   docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest"
+   ```
+
+## 为什么要 Docker 呀？
+
+- 什么是 Docker?
+
+  如果您没有听说 Docker，可以把它想象为一个类似 virtualenv 的系统，但是虚拟的不仅仅是 Python 的运行环境。
+
+- Docker 还是虚拟机？
+
+  有人用虚拟机来类比 Docker。需要强调的是：Docker 不会虚拟任何硬件，Docker container 里运行的编译工具实际上都是在本机的 CPU 和操作系统上直接运行的，性能和把编译工具安装在本机运行基本一样。
+
+- 为什么用 Docker?
+
+  把工具和配置都安装在一个 Docker image 里可以标准化编译环境。这样如果遇到问题，其他人可以复现问题以便帮助。
+
+  另外，对于习惯使用Windows和MacOS的开发者来说，使用Docker就不用配置交叉编译环境了。
+
+- 我可以选择不用Docker吗？
+
+  当然可以。大家可以用把开发工具安装进入 Docker image 一样的方式，把这些工具安装到本机。这篇文档介绍基于 Docker 的开发流程，是因为这个流程比其他方法都更简便。
+
+- 学习 Docker 有多难？
+
+  理解 Docker 并不难，大概花十分钟看一遍 https://zhuanlan.zhihu.com/p/19902938 即可。这可以帮您省掉花一小时安装和配置各种开发工具，以及切换机器时需要新安装的辛苦。别忘了 PaddlePaddle 更新可能导致需要新的开发工具。更别提简化问题复现带来的好处了。
+
+- Docker 需要 sudo
+
+  如果用自己的电脑开发，自然也就有管理员权限（sudo）了。如果用公用的电脑开发，需要请管理员安装和配置好 Docker。此外，PaddlePaddle 项目在努力开始支持其他不需要 sudo 的集装箱技术，比如 rkt。
+
+- 我可以用 IDE 吗？
+
+  当然可以，因为源码就在本机上。IDE 默认调用 make 之类的程序来编译源码，我们只需要配置 IDE 来调用 Docker 命令编译源码即可。
+
+  很多 PaddlePaddle 开发者使用 Emacs。他们在自己的 `~/.emacs` 配置文件里加两行
+
+  ```emacs
+  (global-set-key "\C-cc" 'compile)
+  (setq compile-command
+   "docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev")
+  ```
+
+  就可以按 `Ctrl-C` 和 `c` 键来启动编译了。
+
+- 可以并行编译吗？
+
+  是的。我们的 Docker image 运行一个 Bash 脚本 https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh 。这个脚本调用 `make -j$(nproc)` 来启动和 CPU 核一样多的进程来并行编译。
+
+- Docker on Windows/MacOS？
+
+  Docker 在 Windows 和 MacOS 都可以运行。不过实际上是运行在一个 Linux 虚拟机上。可能需要注意给这个虚拟机多分配一些 CPU 和内存，以保证编译高效。具体做法请参考 https://github.com/PaddlePaddle/Paddle/issues/627 。
diff --git a/doc/howto/dev/build_en.md b/doc/howto/dev/build_en.md
index de0733f963..640d126018 100644
--- a/doc/howto/dev/build_en.md
+++ b/doc/howto/dev/build_en.md
@@ -91,6 +91,10 @@ Nothing else.  Not even Python and GCC, because you can install all build tools
 
   so they could type `Ctrl-C` and `c` to build PaddlePaddle from source.
 
-- How many parallel building processes does the Docker container run?
+- Does Docker do parallel building?
 
   Our building Docker image runs a Bash script https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh, which calls `make -j$(nproc)` to starts as many processes as the number of your processors.
+
+- Docker on Windows/MacOS?
+
+  On Windows and MacOS, Docker containers run in a Linux VM.  You might want to give this VM some more memory and CPUs so to make the building efficient.  Please refer to https://github.com/PaddlePaddle/Paddle/issues/627 for details.
diff --git a/doc/howto/index_cn.rst b/doc/howto/index_cn.rst
index 26449a6365..0608aa3096 100644
--- a/doc/howto/index_cn.rst
+++ b/doc/howto/index_cn.rst
@@ -19,6 +19,7 @@
 ..  toctree::
   :maxdepth: 1
 
+  dev/build_cn.rst
   dev/write_docs_cn.rst
   dev/contribute_to_paddle_cn.md
 
diff --git a/doc/howto/index_en.rst b/doc/howto/index_en.rst
index 1fbfcd260b..1b6034be4e 100644
--- a/doc/howto/index_en.rst
+++ b/doc/howto/index_en.rst
@@ -18,6 +18,7 @@ Development
 ..  toctree::
   :maxdepth: 1
 
+  dev/build_en.rst
   dev/new_layer_en.rst
   dev/contribute_to_paddle_en.md
 

From 7a42c92d49cbcf05bb7c8fc698b923a09503d22e Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Thu, 24 Aug 2017 10:53:00 +0800
Subject: [PATCH 108/170] fix a bug that memory does not clean.

---
 .../gserver/layers/CrossEntropyOverBeam.cpp   | 21 ++++++++++++-------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
index f7736f0ce9..b7c2a44626 100644
--- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
@@ -53,8 +53,8 @@ size_t CostForOneSequence::initLastExpansion() {
                                    candidates->getData() + height * beamSize_,
                                    [](const real& val) { return val != -1; });
   /*
-   * if the gold sequence falls off the beam during search,
-   * add the gold sequence as the last path into all expanded paths.
+   * if the gold sequence falls off the beam during search, add the gold
+   * sequence as the last path into the all expanded candidates.
    */
   if (goldAsExtraPath_) goldIdsInFinalExpansion_ = pathCount++;
 
@@ -133,7 +133,7 @@ real CostForOneSequence::globallyNormalizedScore() {
 
   Matrix::resizeOrCreate(
       softmaxOut_, 1, pathRowIdsInEachBeam_[0].size(), false, false);
-  softmaxOut_->zero();
+  softmaxOut_->zeroMem();
   MatrixPtr tmp = Matrix::create(
       softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false);
 
@@ -143,6 +143,8 @@ real CostForOneSequence::globallyNormalizedScore() {
                            1,
                            false,
                            false);
+    expandedPathScores_[i]->zeroMem();
+
     IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(),
                                         pathRowIdsInEachBeam_[i].size(),
                                         false);
@@ -217,13 +219,16 @@ void CrossEntropyOverBeam::checkInputs() {
     const Argument& goldSeq = getInput(i * 3 + 2);
 
     if (i) {
-      CHECK(scores.hasSubseq()) << "Beam expansion expect the first one, "
-                                   "should be a nested sequence";
+      CHECK(scores.hasSubseq()) << "input " << i << " "
+                                << inputLayers_[i * 3]->getName()
+                                << " should be a nested sequence";
       CHECK_EQ(getInputValue(i * 3 + 1)->getWidth(), beamSize_);
       CHECK_EQ(scores.getNumSequences(), batchSize_);
       CHECK_EQ(scores.getNumSubSequences(), selCandidates.getBatchSize());
     } else {
-      CHECK(scores.hasSeq()) << "The first beam expansion should be a sequence";
+      CHECK(scores.hasSeq()) << "input " << i << " "
+                             << inputLayers_[i]->getName()
+                             << " should be a sequence";
       batchSize_ = scores.getNumSequences();
       beamSize_ = getInputValue(i * 3 + 1)->getWidth();
       CHECK_EQ(batchSize_, selCandidates.getBatchSize());
@@ -332,7 +337,7 @@ void CrossEntropyOverBeam::splitBatchBeams() {
 
 void CrossEntropyOverBeam::resizeOutput() {
   Matrix::resizeOrCreate(output_.value, batchSize_, 1, false, false);
-  output_.value->zero();
+  output_.value->zeroMem();
 
   for (size_t i = 0; i < beamExpanCount_; ++i) {
     MatrixPtr inGrad = getInputGrad(i * 3);
@@ -344,7 +349,7 @@ void CrossEntropyOverBeam::resizeOutput() {
                              false);
     } else
       candidateScoreGrad_[i] = std::move(inGrad);
-    candidateScoreGrad_[i]->zero();
+    candidateScoreGrad_[i]->zeroMem();
   }
 }
 

From 818a64f41ffacca0d3ff07928a19ac47021ccac1 Mon Sep 17 00:00:00 2001
From: wanghaoshuang <wanghaoshuang@baidu.com>
Date: Fri, 25 Aug 2017 13:56:55 +0800
Subject: [PATCH 109/170] Fix img_pool_layer bug.

---
 python/paddle/trainer_config_helpers/layers.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index f323b017c0..862265f2cd 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -2607,15 +2607,15 @@ def img_pool_layer(input,
         assert input.num_filters is not None
         num_channels = input.num_filters
 
-    assert type(pool_type) in [AvgPooling, MaxPooling, CudnnAvgPooling,
-                               CudnnMaxPooling], \
-        "only (Cudnn)AvgPooling, (Cudnn)MaxPooling are supported"
-
     if pool_type is None:
         pool_type = MaxPooling()
     elif isinstance(pool_type, AvgPooling):
         pool_type.name = 'avg'
 
+    assert type(pool_type) in [AvgPooling, MaxPooling, CudnnAvgPooling,
+                               CudnnMaxPooling], \
+        "only (Cudnn)AvgPooling, (Cudnn)MaxPooling are supported"
+
     type_name = pool_type.name + '-projection' \
         if (
         isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \

From aa28d046fb828814b9849aa1ebfc868be2db98f9 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Fri, 25 Aug 2017 14:11:36 +0800
Subject: [PATCH 110/170] fix a bug of sequence_slice layer when batch_size=1

---
 paddle/gserver/layers/SequenceSliceLayer.cpp   | 18 ++++++++++--------
 .../gserver/tests/test_SeqSliceLayerGrad.cpp   |  4 +++-
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp
index 5d72d37304..aab44c4646 100644
--- a/paddle/gserver/layers/SequenceSliceLayer.cpp
+++ b/paddle/gserver/layers/SequenceSliceLayer.cpp
@@ -130,6 +130,8 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
   CHECK(starts || ends) << "At least one of the start or end indices "
                         << "should be given.";
 
+  bool hasSubseq = getInput(0).hasSubseq();
+
   outSeqStartPos_.resize(1, 0);
   outSubSeqStartPos_.resize(1, 0);
   selectedRows_.clear();
@@ -151,14 +153,13 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
         int seqLen = endPos - begPos + 1;
         CHECK_GT(seqLen, 0U);
         for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m);
-        inputSeqInfoVec_.size() > 1
+        hasSubseq
             ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen)
             : outSeqStartPos_.push_back(outSeqStartPos_.back() + seqLen);
       }
       rowIdx++;
     }
-    if (inputSeqInfoVec_.size() > 1)
-      outSeqStartPos_.push_back(outSubSeqStartPos_.back());
+    if (hasSubseq) outSeqStartPos_.push_back(outSubSeqStartPos_.back());
   }
 
   if (useGpu_) {
@@ -175,7 +176,7 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
   output_.sequenceStartPositions->copyFrom(
       outSeqStartPos_.data(), outSeqStartPos_.size(), false);
 
-  if (inputSeqInfoVec_.size() > 1) {
+  if (hasSubseq) {
     ICpuGpuVector::resizeOrCreate(
         output_.subSequenceStartPositions, outSubSeqStartPos_.size(), false);
     output_.subSequenceStartPositions->copyFrom(
@@ -203,10 +204,11 @@ void SequenceSliceLayer::forward(PassType passType) {
   } else
     copySliceIdsToCpu();
 
-  // calculate the selected row indices in a batch,
-  // and build the output sequence information.
-  calSelectedRows(startIdsOnCpu_ ? startIdsOnCpu_ : nullptr,
-                  endIdsOnCpu_ ? endIdsOnCpu_ : nullptr);
+  /*
+   * calculate the selected row indices in a batch, and build the output
+   * sequence information.
+   */
+  calSelectedRows(startIdsOnCpu_, endIdsOnCpu_);
 
   resetOutput(selectedRows_.size(), getSize());
 
diff --git a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
index d560ca650b..e1d4ae1617 100644
--- a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
+++ b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
@@ -30,6 +30,8 @@ const int MAX_SEQ_NUM = 17;
 const int MAX_SEQ_LEN = 23;
 const int MAX_BEAM_SIZE = 13;
 
+const size_t SEED = (size_t)(time(NULL));
+
 vector<real> randSampling(real range, int n) {
   CHECK_GE(range, n);
   vector<real> num(range);
@@ -46,7 +48,7 @@ void genSeqInfo(vector<int>& seqStartPos, vector<int>& subSeqStartPos) {
   seqStartPos.resize(1, 0);
   subSeqStartPos.resize(1, 0);
 
-  srand((size_t)(time(NULL)));
+  srand(SEED);
   int seqNum = 1 + (rand() % MAX_SEQ_NUM);
   for (int i = 0; i < seqNum; ++i) {
     int subSeqNum = 1 + (rand() % MAX_SEQ_NUM);

From 4cc57836f393ada9b65cfeef444662afc34f1109 Mon Sep 17 00:00:00 2001
From: tensor-tang <jian.j.tang@intel.com>
Date: Fri, 25 Aug 2017 17:20:28 +0800
Subject: [PATCH 111/170] enable reorder

---
 paddle/gserver/layers/MKLDNNFcLayer.cpp | 39 +++++------------
 paddle/math/MKLDNNMatrix.cpp            | 57 +++++++++++++++++++++++++
 paddle/math/MKLDNNMatrix.h              | 33 ++++++++++++--
 3 files changed, 97 insertions(+), 32 deletions(-)

diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp
index a5555c4618..ad50c15a7d 100644
--- a/paddle/gserver/layers/MKLDNNFcLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp
@@ -61,39 +61,20 @@ void MKLDNNFcLayer::convertWeightsFromPaddle() {
     return;
   }
 
-  // TODO(TJ): dst format should get from wgtVal_
-  int dstFmt = PARAM_FORMAT_MKLDNN_OI;
-  int srcFmt = weight_->getParameterPtr()->getHeaderFormat();
-  if (srcFmt == dstFmt) {
-    return;
-  }
-
-  // The weight_ is transposed from initial paddle weight
-  MatrixPtr paddleWgt = Matrix::create(
-      weight_->getW()->getData(), iLayerSize_, oc_, false, false);
-
-  // TODO(TJ): remove this print when do not need differ weights
-  std::ostringstream ostr;
-  paddleWgt->print(ostr);
-  VLOG(MKLDNN_ALL) << "Initial Weight from paddle: " << std::endl << ostr.str();
-
-  // The mkldnn weight is transposed from initial paddle matrix
-  MatrixPtr paddleWgtT;
-  paddleWgt->transpose(paddleWgtT, true);
-  weight_->getW()->copyFrom(*paddleWgtT);
-  weight_->getParameterPtr()->setHeaderFormat(dstFmt);
+  CHECK(wgtVal_) << "should have been initialized";
+  bool hasNoSpatial_ = ih_ == 1 && iw_ == 1;
+  auto targetDim = wgtVal_->getDims();
+  auto srcFmt = hasNoSpatial_ ? memory::format::io : memory::format::ihwo;
+  wgtVal_->reorderDataFrom(wgtVal_, srcFmt, targetDim);
   hasInitedWgt_ = true;
 }
 
 void MKLDNNFcLayer::convertWeightsToPaddle() {
-  MatrixPtr dnnWgt = weight_->getW();
-  MatrixPtr paddleWgt;
-  dnnWgt->transpose(paddleWgt, true);
-
-  // copy paddle weight and override on weight_
-  MatrixPtr dnnWgtT = Matrix::create(
-      dnnWgt->getData(), dnnWgt->getWidth(), dnnWgt->getHeight(), false, false);
-  dnnWgtT->copyFrom(*paddleWgt);
+  CHECK(wgtVal_) << "should have been initialized";
+  bool hasNoSpatial_ = ih_ == 1 && iw_ == 1;
+  auto targetDim = wgtVal_->getDims();
+  auto dstFmt = hasNoSpatial_ ? memory::format::io : memory::format::ihwo;
+  wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim);
 }
 
 void MKLDNNFcLayer::reshape() {
diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp
index 94df9c1550..32ae3b1bcf 100644
--- a/paddle/math/MKLDNNMatrix.cpp
+++ b/paddle/math/MKLDNNMatrix.cpp
@@ -56,6 +56,63 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m,
   return create(m, pd);
 }
 
+void MKLDNNMatrix::reorderDataFrom(const MKLDNNMatrixPtr& m,
+                                   memory::format srcFmt,
+                                   memory::dims targetDim) {
+  memory::format dstFmt = getFormat();
+  if (srcFmt == dstFmt) {
+    return;
+  }
+  CHECK_EQ(getElementCnt(), m->getElementCnt()) << "size should equal";
+  real* srcData = getData();
+  real* dstData = m->getData();
+  reorderOnce(srcData, dstData, srcFmt, dstFmt, targetDim);
+}
+
+void MKLDNNMatrix::reorderDataTo(const MKLDNNMatrixPtr& m,
+                                 memory::format dstFmt,
+                                 memory::dims targetDim) {
+  memory::format srcFmt = getFormat();
+  if (srcFmt == dstFmt) {
+    return;
+  }
+  CHECK_EQ(getElementCnt(), m->getElementCnt()) << "size should equal";
+  real* srcData = getData();
+  real* dstData = m->getData();
+  reorderOnce(srcData, dstData, srcFmt, dstFmt, targetDim);
+}
+
+void MKLDNNMatrix::reorderOnce(void* srcData,
+                               void* dstData,
+                               memory::format srcFmt,
+                               memory::format dstFmt,
+                               memory::dims dm) {
+  CHECK(srcData);
+  CHECK(dstData);
+  MatrixPtr tmpSrc;
+  if (dstData == srcData) {
+    // inplace data
+    size_t sz = 1;
+    for (size_t i = 0; i < dm.size(); ++i) {
+      sz *= dm[i];
+    }
+    tmpSrc = Matrix::create(sz, 1, false, false);
+    tmpSrc->copyFrom((real*)srcData, sz);
+    srcData = tmpSrc->getData();
+  }
+
+  auto dtype = this->getDtype();
+  auto srcMD = memory::desc(dm, dtype, srcFmt);
+  auto dstMD = memory::desc(dm, dtype, dstFmt);
+
+  auto eg = this->getEngine();
+  auto src = memory(memory::primitive_desc(srcMD, eg), srcData);
+  auto dst = memory(memory::primitive_desc(dstMD, eg), dstData);
+
+  auto r = reorder(src, dst);
+  stream(stream::kind::eager).submit({r}).wait();
+}
+
 void MKLDNNMatrix::downSpatial() {
   int fmt = getFormat();
   if (!(fmt == memory::format::nchw || fmt == memory::format::oihw)) {
diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h
index 05adc867c2..ea3fd7d461 100644
--- a/paddle/math/MKLDNNMatrix.h
+++ b/paddle/math/MKLDNNMatrix.h
@@ -21,9 +21,6 @@ limitations under the License. */
 
 namespace paddle {
 
-static const std::map<mkldnn::memory::format, PARAM_FORMAT> PARAM_FOARMAT_MAP =
-    {{mkldnn::memory::format::oi, PARAM_FORMAT_MKLDNN_OI}};
-
 class MKLDNNMatrix;
 typedef std::shared_ptr<MKLDNNMatrix> MKLDNNMatrixPtr;
 
@@ -57,6 +54,26 @@ public:
       mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32);
 
 public:
+  /**
+   * Reorder this MKLDNNMatrix from other format.
+   * Support inplace reorder
+   * Pay attention: this function would only reorder the data layout.
+   *                will NOT change this original dim or format info
+   */
+  void reorderDataFrom(const MKLDNNMatrixPtr& m,
+                       memory::format srcFmt,
+                       memory::dims targetDim);
+
+  /**
+   * Reorder this MKLDNNMatrix to other format.
+   * Support inplace reorder
+   * Pay attention: this function would only reorder the data layout.
+   *                will NOT change the dst dim or format info
+   */
+  void reorderDataTo(const MKLDNNMatrixPtr& m,
+                     memory::format dstFmt,
+                     memory::dims targetDim);
+
   /**
    * Dimensionality reduction.
    * Change format "nchw --> nc" or "oihw --> oi" if the h and w are both 1
@@ -113,6 +130,16 @@ public:
    * Get engine.
    */
   mkldnn::engine getEngine() { return getPD().get_engine(); }
+
+protected:
+  /**
+   * Do once reorder supported inplace.
+   */
+  void reorderOnce(void* srcData,
+                   void* dstData,
+                   memory::format srcFmt,
+                   memory::format dstFmt,
+                   memory::dims dm);
 };
 
 }  // namespace paddle

From 7035bb63e91a2dcf1f91df5e440d2c3e45bdd2e8 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Fri, 25 Aug 2017 20:44:04 +0800
Subject: [PATCH 112/170] fix a bug.

---
 paddle/parameter/Argument.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index 2b945de18a..b0e9e740c8 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -677,6 +677,7 @@ void Argument::reorganizeSeqInfo(
     const ICpuGpuVectorPtr subSeqStartPos,
     std::vector<std::vector<int>>& reorganizedSeqInfo) {
   CHECK(seqStartPos);
+  reorganizedSeqInfo.clear();
 
   int seqNum = seqStartPos->getSize() - 1;
   int* seqStarts = seqStartPos->getMutableData(false);

From c8d0c9af865cd0ac47d1cd7461c24793d833eeff Mon Sep 17 00:00:00 2001
From: Yi Wang <yiwang01@baidu.com>
Date: Fri, 25 Aug 2017 11:24:48 -0700
Subject: [PATCH 113/170] In response to comments from Luo Tao

---
 doc/howto/dev/build_cn.md | 6 +++---
 doc/howto/dev/build_en.md | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/howto/dev/build_cn.md b/doc/howto/dev/build_cn.md
index dc372de9fa..7c95579636 100644
--- a/doc/howto/dev/build_cn.md
+++ b/doc/howto/dev/build_cn.md
@@ -71,7 +71,7 @@
 
 - 学习 Docker 有多难？
 
-  理解 Docker 并不难，大概花十分钟看一遍 https://zhuanlan.zhihu.com/p/19902938 即可。这可以帮您省掉花一小时安装和配置各种开发工具，以及切换机器时需要新安装的辛苦。别忘了 PaddlePaddle 更新可能导致需要新的开发工具。更别提简化问题复现带来的好处了。
+  理解 Docker 并不难，大概花十分钟看一下[这篇文章](https://zhuanlan.zhihu.com/p/19902938)。这可以帮您省掉花一小时安装和配置各种开发工具，以及切换机器时需要新安装的辛苦。别忘了 PaddlePaddle 更新可能导致需要新的开发工具。更别提简化问题复现带来的好处了。
 
 - Docker 需要 sudo
 
@@ -93,8 +93,8 @@
 
 - 可以并行编译吗？
 
-  是的。我们的 Docker image 运行一个 Bash 脚本 https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh 。这个脚本调用 `make -j$(nproc)` 来启动和 CPU 核一样多的进程来并行编译。
+  是的。我们的 Docker image 运行一个 [Bash 脚本](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh)。这个脚本调用 `make -j$(nproc)` 来启动和 CPU 核一样多的进程来并行编译。
 
 - Docker on Windows/MacOS？
 
-  Docker 在 Windows 和 MacOS 都可以运行。不过实际上是运行在一个 Linux 虚拟机上。可能需要注意给这个虚拟机多分配一些 CPU 和内存，以保证编译高效。具体做法请参考 https://github.com/PaddlePaddle/Paddle/issues/627 。
+  Docker 在 Windows 和 MacOS 都可以运行。不过实际上是运行在一个 Linux 虚拟机上。可能需要注意给这个虚拟机多分配一些 CPU 和内存，以保证编译高效。具体做法请参考[这个issue](https://github.com/PaddlePaddle/Paddle/issues/627)。
diff --git a/doc/howto/dev/build_en.md b/doc/howto/dev/build_en.md
index 640d126018..3be2405ea7 100644
--- a/doc/howto/dev/build_en.md
+++ b/doc/howto/dev/build_en.md
@@ -71,7 +71,7 @@ Nothing else.  Not even Python and GCC, because you can install all build tools
 
 - How difficult is it to learn Docker?
 
-  It takes you ten minutes to read https://docs.docker.com/get-started/ and saves you more than one hour to install all required build tools, configure them, and upgrade them when new versions of PaddlePaddle require some new tools.
+  It takes you ten minutes to read [an introductory article](https://docs.docker.com/get-started) and saves you more than one hour to install all required build tools, configure them, and upgrade them when new versions of PaddlePaddle require some new tools.
 
 - Docker requires sudo
 
@@ -93,8 +93,8 @@ Nothing else.  Not even Python and GCC, because you can install all build tools
 
 - Does Docker do parallel building?
 
-  Our building Docker image runs a Bash script https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh, which calls `make -j$(nproc)` to starts as many processes as the number of your processors.
+  Our building Docker image runs a [Bash script](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh), which calls `make -j$(nproc)` to starts as many processes as the number of your processors.
 
 - Docker on Windows/MacOS?
 
-  On Windows and MacOS, Docker containers run in a Linux VM.  You might want to give this VM some more memory and CPUs so to make the building efficient.  Please refer to https://github.com/PaddlePaddle/Paddle/issues/627 for details.
+  On Windows and MacOS, Docker containers run in a Linux VM.  You might want to give this VM some more memory and CPUs so to make the building efficient.  Please refer to [this issue](https://github.com/PaddlePaddle/Paddle/issues/627) for details.

From f71f3935e3ce05a8e90edc971f5ab08d71ed2966 Mon Sep 17 00:00:00 2001
From: Yi Wang <yiwang01@baidu.com>
Date: Fri, 25 Aug 2017 11:51:53 -0700
Subject: [PATCH 114/170] In response to comments from Chen Xi

---
 doc/howto/dev/build_cn.md | 20 +++++++++++++-------
 doc/howto/dev/build_en.md | 34 ++++++++++++++++++++--------------
 2 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/doc/howto/dev/build_cn.md b/doc/howto/dev/build_cn.md
index 7c95579636..0077d90118 100644
--- a/doc/howto/dev/build_cn.md
+++ b/doc/howto/dev/build_cn.md
@@ -23,13 +23,17 @@
    cd paddle; docker build -t paddle:dev .
    ```
 
+   请注意这个命令结尾处的 `.`；它表示 `docker build` 应该读取当前目录下的 [`Dockerfile`文件](https://github.com/PaddlePaddle/Paddle/blob/develop/Dockerfile)，按照其内容创建一个名为 `paddle:dev` 的 Docker image，并且把各种开发工具安装进去。
+
 3. 编译
 
+   以下命令启动一个 Docker container 来执行 `paddle:dev` 这个 Docker image，同时把当前目录（源码树根目录）映射为 container 里的 `/paddle` 目录，并且运行 `Dockerfile` 描述的默认入口程序 [`build.sh`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh)。这个脚本调用 `cmake` 和 `make` 来编译 `/paddle` 里的源码，结果输出到 `/paddle/build`，也就是本地的源码树根目录里的 `build` 子目录。
+
    ```bash
    docker run -v $PWD:/paddle paddle:dev
    ```
 
-   这个命令编译出一个 CUDA-enabled 版本。所有二进制文件会被写到本机的 `./build` 目录，而不是写到 Docker container 里。如果我们只需要编译一个只支持 CPU 的版本，可以用
+   上述命令编译出一个 CUDA-enabled 版本。如果我们只需要编译一个只支持 CPU 的版本，可以用
 
    ```bash
    docker run -e WITH_GPU=OFF -v $PWD:/paddle paddle:dev
@@ -57,7 +61,7 @@
 
 - Docker 还是虚拟机？
 
-  有人用虚拟机来类比 Docker。需要强调的是：Docker 不会虚拟任何硬件，Docker container 里运行的编译工具实际上都是在本机的 CPU 和操作系统上直接运行的，性能和把编译工具安装在本机运行基本一样。
+  有人用虚拟机来类比 Docker。需要强调的是：Docker 不会虚拟任何硬件，Docker container 里运行的编译工具实际上都是在本机的 CPU 和操作系统上直接运行的，性能和把编译工具安装在本机运行一样。
 
 - 为什么用 Docker?
 
@@ -73,10 +77,6 @@
 
   理解 Docker 并不难，大概花十分钟看一下[这篇文章](https://zhuanlan.zhihu.com/p/19902938)。这可以帮您省掉花一小时安装和配置各种开发工具，以及切换机器时需要新安装的辛苦。别忘了 PaddlePaddle 更新可能导致需要新的开发工具。更别提简化问题复现带来的好处了。
 
-- Docker 需要 sudo
-
-  如果用自己的电脑开发，自然也就有管理员权限（sudo）了。如果用公用的电脑开发，需要请管理员安装和配置好 Docker。此外，PaddlePaddle 项目在努力开始支持其他不需要 sudo 的集装箱技术，比如 rkt。
-
 - 我可以用 IDE 吗？
 
   当然可以，因为源码就在本机上。IDE 默认调用 make 之类的程序来编译源码，我们只需要配置 IDE 来调用 Docker 命令编译源码即可。
@@ -95,6 +95,12 @@
 
   是的。我们的 Docker image 运行一个 [Bash 脚本](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh)。这个脚本调用 `make -j$(nproc)` 来启动和 CPU 核一样多的进程来并行编译。
 
-- Docker on Windows/MacOS？
+## 可能碰到的问题
+
+- Docker 需要 sudo
+
+  如果用自己的电脑开发，自然也就有管理员权限（sudo）了。如果用公用的电脑开发，需要请管理员安装和配置好 Docker。此外，PaddlePaddle 项目在努力开始支持其他不需要 sudo 的集装箱技术，比如 rkt。
+
+- 在 Windows/MacOS 上编译很慢
 
   Docker 在 Windows 和 MacOS 都可以运行。不过实际上是运行在一个 Linux 虚拟机上。可能需要注意给这个虚拟机多分配一些 CPU 和内存，以保证编译高效。具体做法请参考[这个issue](https://github.com/PaddlePaddle/Paddle/issues/627)。
diff --git a/doc/howto/dev/build_en.md b/doc/howto/dev/build_en.md
index 3be2405ea7..95752beba0 100644
--- a/doc/howto/dev/build_en.md
+++ b/doc/howto/dev/build_en.md
@@ -7,7 +7,7 @@ To contribute to PaddlePaddle, you need
 1. A computer -- Linux, BSD, Windows, MacOS, and
 1. Docker.
 
-Nothing else.  Not even Python and GCC, because you can install all build tools into a Docker image.
+Nothing else.  Not even Python and GCC, because you can install all build tools into a Docker image.  We run all the tools by running this image.
 
 ## General Process
 
@@ -17,19 +17,23 @@ Nothing else.  Not even Python and GCC, because you can install all build tools
    git clone https://github.com/paddlepaddle/paddle
    ```
 
-2. Install build tools.
+2. Install build tools into a Docker image.
 
    ```bash
    cd paddle; docker build -t paddle:dev .
    ```
 
+   Please be aware of the `.` at the end of the command, which refers to the [`./Dockerfile` file](https://github.com/PaddlePaddle/Paddle/blob/develop/Dockerfile).  `docker build` follows instructions in this file to create a Docker image named `paddle:dev`, and installs building tools into it.
+
 3. Build from source.
 
+   This following command starts a Docker container that executes the Docker image `paddle:dev`, mapping the current directory to `/paddle/` in the container, and runs the default entry-point [`build.sh`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh) as specified in the Dockefile.  `build.sh` invokes `cmake` and `make` to build PaddlePaddle source code, which had been mapped to `/paddle`, and writes outputs to `/paddle/build`, which maps to `build` in the current source directory on the computer.
+
    ```bash
    docker run -v $PWD:/paddle paddle:dev
    ```
 
-   This builds a CUDA-enabled version and writes all binary outputs to directory `./build` of the local computer, other than the Docker container.  If we want to build only the CPU part, we can type
+   Above command builds a CUDA-enabled version.  If we want to build a CPU-only version, we can type
 
    ```bash
    docker run -e WITH_GPU=OFF -v $PWD:/paddle paddle:dev
@@ -57,25 +61,21 @@ Nothing else.  Not even Python and GCC, because you can install all build tools
 
 - Docker or virtual machine?
 
-  Some people compare Docker with VMs, but Docker doesn't virtualize any hardware, and it doesn't run a guest OS.
+  Some people compare Docker with VMs, but Docker doesn't virtualize any hardware nor running a guest OS, which means there is no compromise on the performance.
 
 - Why Docker?
 
-  Using a Docker image of build tools standardize the building environment, and easier for others to reproduce your problem, if there is any, and help.
+  Using a Docker image of build tools standardizes the building environment, which makes it easier for others to reproduce your problems and to help.
 
   Also, some build tools don't run on Windows or Mac or BSD, but Docker runs almost everywhere, so developers can use whatever computer they want.
 
-- Can I don't use Docker?
+- Can I choose not to use Docker?
 
-  Sure, you don't have to install build tools into a Docker image; instead, you can install them onto your local computer.  This document exists because Docker would make the development way easier.
+  Sure, you don't have to install build tools into a Docker image; instead, you can install them in your local computer.  This document exists because Docker would make the development way easier.
 
 - How difficult is it to learn Docker?
 
-  It takes you ten minutes to read [an introductory article](https://docs.docker.com/get-started) and saves you more than one hour to install all required build tools, configure them, and upgrade them when new versions of PaddlePaddle require some new tools.
-
-- Docker requires sudo
-
-  An owner of a computer has the administrative privilege, a.k.a., sudo.  If you use a shared computer for development, please ask the administrator to install and configure Docker.  We will do our best to support rkt, another container technology that doesn't require sudo.
+    It takes you ten minutes to read [an introductory article](https://docs.docker.com/get-started) and saves you more than one hour to install all required build tools, configure them, especially when new versions of PaddlePaddle require some new tools.  Not even to mention the time saved when other people trying to reproduce the issue you have.
 
 - Can I use my favorite IDE?
 
@@ -93,8 +93,14 @@ Nothing else.  Not even Python and GCC, because you can install all build tools
 
 - Does Docker do parallel building?
 
-  Our building Docker image runs a [Bash script](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh), which calls `make -j$(nproc)` to starts as many processes as the number of your processors.
+  Our building Docker image runs a [Bash script](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh), which calls `make -j$(nproc)` to starts as many processes as the number of your CPU cores.
+
+## Some Gotchas
+
+- Docker requires sudo
+
+  An owner of a computer has the administrative privilege, a.k.a., sudo, and Docker requires this privilege to work properly.  If you use a shared computer for development, please ask the administrator to install and configure Docker.  We will do our best to support rkt, another container technology that doesn't require sudo.
 
-- Docker on Windows/MacOS?
+- Docker on Windows/MacOS builds slowly
 
   On Windows and MacOS, Docker containers run in a Linux VM.  You might want to give this VM some more memory and CPUs so to make the building efficient.  Please refer to [this issue](https://github.com/PaddlePaddle/Paddle/issues/627) for details.

From 4b0235c1f2792cdecfe7d8f3e0bb1d0c57c6f361 Mon Sep 17 00:00:00 2001
From: Yi Wang <yiwang01@baidu.com>
Date: Fri, 25 Aug 2017 14:31:02 -0700
Subject: [PATCH 115/170] Update build.sh

---
 paddle/scripts/docker/build.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh
index 7bab814ae8..1798642022 100644
--- a/paddle/scripts/docker/build.sh
+++ b/paddle/scripts/docker/build.sh
@@ -63,12 +63,11 @@ cmake .. \
 cat <<EOF
 ============================================
 Building in /paddle/build ...
-   Build unit tests: ${WITH_TESTING:-OFF}
 ============================================
 EOF
 make -j `nproc`
 
-if [ ${WITH_TESTING:-OFF} == "ON" ] && [ ${RUN_TEST:-OFF} == "ON" ] ; then
+if [ ${WITH_TESTING:-ON} == "ON" ] && [ ${RUN_TEST:-OFF} == "ON" ] ; then
 cat <<EOF
 ========================================
 Running unit tests ...

From ab53bea1dc8b303748ed95a8fc6cb08c628725f1 Mon Sep 17 00:00:00 2001
From: Yi Wang <yi.wang.2005@gmail.com>
Date: Fri, 25 Aug 2017 14:43:29 -0700
Subject: [PATCH 116/170] Run a specific test

---
 doc/howto/dev/build_cn.md | 6 ++++++
 doc/howto/dev/build_en.md | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/doc/howto/dev/build_cn.md b/doc/howto/dev/build_cn.md
index 0077d90118..79b4ff9d5a 100644
--- a/doc/howto/dev/build_cn.md
+++ b/doc/howto/dev/build_cn.md
@@ -53,6 +53,12 @@
    docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest"
    ```
 
+   有时候我们只想运行一个特定的单元测试，比如 `memory_test`，我们可以
+
+   ```bash
+   docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test"
+   ```
+
 ## 为什么要 Docker 呀？
 
 - 什么是 Docker?
diff --git a/doc/howto/dev/build_en.md b/doc/howto/dev/build_en.md
index 95752beba0..e1b55929f9 100644
--- a/doc/howto/dev/build_en.md
+++ b/doc/howto/dev/build_en.md
@@ -53,6 +53,12 @@ Nothing else.  Not even Python and GCC, because you can install all build tools
    docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest"
    ```
 
+   Sometimes we want to run a specific unit test, say `memory_test`, we can run
+
+   ```bash
+   docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test"
+   ```
+
 ## Docker, Or Not?
 
 - What is Docker?

From 97649bf9b251707803b2665dedf1ef8f929d8c88 Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Fri, 25 Aug 2017 22:08:24 +0000
Subject: [PATCH 117/170] fix codes in scatter

---
 paddle/operators/scatter_op.cc                | 26 +++++++++++++------
 paddle/operators/scatter_op.h                 |  6 ++---
 .../v2/framework/tests/gradient_checker.py    | 13 +++++-----
 .../v2/framework/tests/test_scatter_op.py     |  1 -
 4 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/paddle/operators/scatter_op.cc b/paddle/operators/scatter_op.cc
index cf01ef6279..f901edefa2 100644
--- a/paddle/operators/scatter_op.cc
+++ b/paddle/operators/scatter_op.cc
@@ -24,8 +24,18 @@ class ScatterOp : public framework::OperatorWithKernel {
 
  protected:
   void InferShape(const framework::InferShapeContext &ctx) const override {
-    framework::DDim output_dims(ctx.Input<Tensor>("Ref")->dims());
-    ctx.Output<Tensor>("Out")->Resize(output_dims);
+    PADDLE_ENFORCE_EQ(ctx.Input<Tensor>("Index")->dims().size(), 1,
+                      "Update Index should be 1-D.");
+    PADDLE_ENFORCE_EQ(ctx.Input<Tensor>("Ref")->dims().size(),
+                      ctx.Input<Tensor>("Updates")->dims().size(),
+                      "Reference and Updates should have the same shape size");
+    PADDLE_ENFORCE_EQ(ctx.Input<Tensor>("Updates")->dims()[0],
+                      ctx.Input<Tensor>("Index")->dims()[0],
+                      "Updates and Index should have same batch-size.");
+    framework::DDim data_dim(ctx.Input<Tensor>("Updates")->dims());
+    for (int i = 1; i < data_dim.size(); ++i)
+      PADDLE_ENFORCE_EQ(data_dim[i], ctx.Input<Tensor>("Updates")->dims()[i]);
+    ctx.Output<Tensor>("Out")->Resize(ctx.Input<Tensor>("Ref")->dims());
   }
 };
 
@@ -35,13 +45,13 @@ class ScatterGradOp : public framework::OperatorWithKernel {
 
  protected:
   void InferShape(const framework::InferShapeContext &ctx) const override {
-    auto Updates_grad = ctx.Output<Tensor>(framework::GradVarName("Updates"));
-    auto Updates = ctx.Input<Tensor>("Updates");
-    auto Ref_grad = ctx.Output<Tensor>(framework::GradVarName("Ref"));
-    auto Ref = ctx.Input<Tensor>("Ref");
+    auto *dUpdates = ctx.Output<Tensor>(framework::GradVarName("Updates"));
+    auto *Updates = ctx.Input<Tensor>("Updates");
+    auto *dRef = ctx.Output<Tensor>(framework::GradVarName("Ref"));
+    auto *Ref = ctx.Input<Tensor>("Ref");
 
-    Ref_grad->Resize(Ref->dims());
-    Updates_grad->Resize(Updates->dims());
+    dRef->Resize(Ref->dims());
+    dUpdates->Resize(Updates->dims());
   }
 };
 
diff --git a/paddle/operators/scatter_op.h b/paddle/operators/scatter_op.h
index c2db3ae37c..e9595638a8 100644
--- a/paddle/operators/scatter_op.h
+++ b/paddle/operators/scatter_op.h
@@ -46,13 +46,13 @@ class ScatterGradientOpKernel : public framework::OpKernel {
     auto *dRef = ctx.Output<Tensor>(framework::GradVarName("Ref"));
     auto *dUpdates = ctx.Output<Tensor>(framework::GradVarName("Updates"));
     auto *Index = ctx.Input<Tensor>("Index");
-    auto *dO = ctx.Input<Tensor>(framework::GradVarName("Out"));
+    auto *dOut = ctx.Input<Tensor>(framework::GradVarName("Out"));
 
     // In place gradient: dRef = dO
-    dRef->ShareDataWith<T>(*dO);
+    dRef->ShareDataWith<T>(*dOut);
     dUpdates->mutable_data<T>(ctx.GetPlace());
     // Gradient by Gather: dUpdates += dO[Index]
-    Gather<T>(ctx.GetPlace(), dO, Index, dUpdates);
+    Gather<T>(ctx.GetPlace(), dOut, Index, dUpdates);
   }
 };
 
diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py
index ac37671c77..abe0b5391a 100644
--- a/python/paddle/v2/framework/tests/gradient_checker.py
+++ b/python/paddle/v2/framework/tests/gradient_checker.py
@@ -82,6 +82,11 @@ def get_numeric_gradient(op,
     def product(dim):
         return reduce(lambda a, b: a * b, dim, 1)
 
+    def copy_tensor():
+        for var_name in input_values:
+            tensor_ = local_scope.find_var(var_name).get_tensor()
+            tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace())
+
     # get the input tensor that we want to get it's numeric gradient.
     tensor_to_check = local_scope.find_var(input_to_check).get_tensor()
     tensor_size = product(tensor_to_check.get_dims())
@@ -92,9 +97,7 @@ def get_numeric_gradient(op,
     # we use a for loop to compute the gradient of every element.
     for i in xrange(tensor_size):
         if in_place:
-            for var_name in input_values:
-                tensor_ = local_scope.find_var(var_name).get_tensor()
-                tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace())
+            copy_tensor()
         # get one input element throw it's index i.
         origin = tensor_to_check.get_float_element(i)
 
@@ -105,9 +108,7 @@ def get_numeric_gradient(op,
 
         # plus delta to this element, run op and get the sum of the result tensor.
         if in_place:
-            for var_name in input_values:
-                tensor_ = local_scope.find_var(var_name).get_tensor()
-                tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace())
+            copy_tensor()
         x_neg = origin - delta
         tensor_to_check.set_float_element(i, x_neg)
         y_neg = get_output()
diff --git a/python/paddle/v2/framework/tests/test_scatter_op.py b/python/paddle/v2/framework/tests/test_scatter_op.py
index 861fe6cf89..c1f9444889 100644
--- a/python/paddle/v2/framework/tests/test_scatter_op.py
+++ b/python/paddle/v2/framework/tests/test_scatter_op.py
@@ -30,7 +30,6 @@ class TestScatterGradOp(GradientChecker):
         output_np = numpy.copy(ref_np)
         output_np[index_np] += updates_np
         inputs = {'Ref': ref_np, 'Index': index_np, 'Updates': updates_np}
-        # check gradient
         self.check_grad(
             op, inputs, set(["Updates", "Ref"]), "Out", in_place=True)
 

From 6f235553fd923d4b0b225fdc4a521570b03fbc24 Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Fri, 25 Aug 2017 22:20:20 +0000
Subject: [PATCH 118/170] scatter op fixed

---
 paddle/operators/scatter_op.cc | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/paddle/operators/scatter_op.cc b/paddle/operators/scatter_op.cc
index f901edefa2..09a2f94dde 100644
--- a/paddle/operators/scatter_op.cc
+++ b/paddle/operators/scatter_op.cc
@@ -1,16 +1,16 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
 
-    http://www.apache.org/licenses/LICENSE-2.0
+   http://www.apache.org/licenses/LICENSE-2.0
 
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
 
 #include "paddle/operators/scatter_op.h"
 #include "paddle/framework/ddim.h"

From 852f341615808b6a5e6249b3b7c1f5f20fd22ec9 Mon Sep 17 00:00:00 2001
From: Yi Wang <yi.wang.2005@gmail.com>
Date: Fri, 25 Aug 2017 16:48:52 -0700
Subject: [PATCH 119/170] Add clean build section

---
 doc/howto/dev/build_cn.md | 10 +++++++++-
 doc/howto/dev/build_en.md | 10 +++++++++-
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/doc/howto/dev/build_cn.md b/doc/howto/dev/build_cn.md
index 79b4ff9d5a..d9d520893f 100644
--- a/doc/howto/dev/build_cn.md
+++ b/doc/howto/dev/build_cn.md
@@ -56,7 +56,15 @@
    有时候我们只想运行一个特定的单元测试，比如 `memory_test`，我们可以
 
    ```bash
-   docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test"
+   nvidia-docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test"
+   ```
+
+5. 清理
+
+   有时候我们会希望清理掉已经下载的第三方依赖以及已经编译的二进制文件。此时只需要：
+
+   ```bash
+   rm -rf build
    ```
 
 ## 为什么要 Docker 呀？
diff --git a/doc/howto/dev/build_en.md b/doc/howto/dev/build_en.md
index e1b55929f9..318bf3d384 100644
--- a/doc/howto/dev/build_en.md
+++ b/doc/howto/dev/build_en.md
@@ -56,7 +56,15 @@ Nothing else.  Not even Python and GCC, because you can install all build tools
    Sometimes we want to run a specific unit test, say `memory_test`, we can run
 
    ```bash
-   docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test"
+   nvidia-docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test"
+   ```
+
+5. Clean Build.
+
+   Sometimes, we might want to clean all thirt-party dependents and built binaries.  To do so, just
+
+   ```bash
+   rm -rf build
    ```
 
 ## Docker, Or Not?

From bfeecfd3d25c5c04dbd7d000490e77454944d044 Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Fri, 25 Aug 2017 23:54:46 +0000
Subject: [PATCH 120/170] fix problems

---
 paddle/operators/scatter_op.cc                 | 18 +++++++++---------
 paddle/operators/scatter_op.cu                 | 18 +++++++++---------
 .../v2/framework/tests/gradient_checker.py     |  6 +++---
 3 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/paddle/operators/scatter_op.cc b/paddle/operators/scatter_op.cc
index 09a2f94dde..f901edefa2 100644
--- a/paddle/operators/scatter_op.cc
+++ b/paddle/operators/scatter_op.cc
@@ -1,16 +1,16 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
 
-   http://www.apache.org/licenses/LICENSE-2.0
+    http://www.apache.org/licenses/LICENSE-2.0
 
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
 
 #include "paddle/operators/scatter_op.h"
 #include "paddle/framework/ddim.h"
diff --git a/paddle/operators/scatter_op.cu b/paddle/operators/scatter_op.cu
index e6a6fa57d9..6716b47883 100644
--- a/paddle/operators/scatter_op.cu
+++ b/paddle/operators/scatter_op.cu
@@ -1,16 +1,16 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
 
-   http://www.apache.org/licenses/LICENSE-2.0
+    http://www.apache.org/licenses/LICENSE-2.0
 
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
 
 #define EIGEN_USE_GPU
 #include "paddle/operators/scatter_op.h"
diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py
index abe0b5391a..9a7a7fbf5e 100644
--- a/python/paddle/v2/framework/tests/gradient_checker.py
+++ b/python/paddle/v2/framework/tests/gradient_checker.py
@@ -82,7 +82,7 @@ def get_numeric_gradient(op,
     def product(dim):
         return reduce(lambda a, b: a * b, dim, 1)
 
-    def copy_tensor():
+    def restore_inputs():
         for var_name in input_values:
             tensor_ = local_scope.find_var(var_name).get_tensor()
             tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace())
@@ -97,7 +97,7 @@ def get_numeric_gradient(op,
     # we use a for loop to compute the gradient of every element.
     for i in xrange(tensor_size):
         if in_place:
-            copy_tensor()
+            restore_inputs()
         # get one input element throw it's index i.
         origin = tensor_to_check.get_float_element(i)
 
@@ -108,7 +108,7 @@ def get_numeric_gradient(op,
 
         # plus delta to this element, run op and get the sum of the result tensor.
         if in_place:
-            copy_tensor()
+            restore_inputs()
         x_neg = origin - delta
         tensor_to_check.set_float_element(i, x_neg)
         y_neg = get_output()

From ec5e20c9f12e89e13b52978b8bb27997c77f059c Mon Sep 17 00:00:00 2001
From: Yi Wang <yi.wang.2005@gmail.com>
Date: Fri, 25 Aug 2017 17:14:28 -0700
Subject: [PATCH 121/170] Remove stopped containers and dangling images

---
 doc/howto/dev/build_cn.md | 18 +++++++++++-------
 doc/howto/dev/build_en.md |  4 ++++
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/doc/howto/dev/build_cn.md b/doc/howto/dev/build_cn.md
index d9d520893f..0b911f7b75 100644
--- a/doc/howto/dev/build_cn.md
+++ b/doc/howto/dev/build_cn.md
@@ -7,7 +7,7 @@
 1. 一台电脑，可以装的是 Linux, BSD, Windows 或者 MacOS 操作系统，以及
 1. Docker。
 
-不需要其他任何软件了。即便是 Python 和 GCC 都不需要，因为我们会把所有编译工具都安装进一个 Docker image 里。
+不需要依赖其他任何软件了。即便是 Python 和 GCC 都不需要，因为我们会把所有编译工具都安装进一个 Docker image 里。
 
 ## 总体流程
 
@@ -17,7 +17,7 @@
    git clone https://github.com/paddlepaddle/paddle
    ```
 
-2. 安装工具
+2. 安装开发工具到 Docker image 里
 
    ```bash
    cd paddle; docker build -t paddle:dev .
@@ -30,13 +30,13 @@
    以下命令启动一个 Docker container 来执行 `paddle:dev` 这个 Docker image，同时把当前目录（源码树根目录）映射为 container 里的 `/paddle` 目录，并且运行 `Dockerfile` 描述的默认入口程序 [`build.sh`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh)。这个脚本调用 `cmake` 和 `make` 来编译 `/paddle` 里的源码，结果输出到 `/paddle/build`，也就是本地的源码树根目录里的 `build` 子目录。
 
    ```bash
-   docker run -v $PWD:/paddle paddle:dev
+   docker run --rm -v $PWD:/paddle paddle:dev
    ```
 
    上述命令编译出一个 CUDA-enabled 版本。如果我们只需要编译一个只支持 CPU 的版本，可以用
 
    ```bash
-   docker run -e WITH_GPU=OFF -v $PWD:/paddle paddle:dev
+   docker run --rm -e WITH_GPU=OFF -v $PWD:/paddle paddle:dev
    ```
 
 4. 运行单元测试
@@ -44,19 +44,19 @@
    用本机的第一个 GPU 来运行包括 GPU 单元测试在内的所有单元测试：
 
    ```bash
-   NV_GPU=0 nvidia-docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest"
+   NV_GPU=0 nvidia-docker run --rm -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest"
    ```
 
    如果编译的时候我们用了 `WITH_GPU=OFF` 选项，那么编译过程只会产生 CPU-based 单元测试，那么我们也就不需要 nvidia-docker 来运行单元测试了。我们只需要：
 
    ```bash
-   docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest"
+   docker run --rm -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest"
    ```
 
    有时候我们只想运行一个特定的单元测试，比如 `memory_test`，我们可以
 
    ```bash
-   nvidia-docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test"
+   nvidia-docker run --rm -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test"
    ```
 
 5. 清理
@@ -118,3 +118,7 @@
 - 在 Windows/MacOS 上编译很慢
 
   Docker 在 Windows 和 MacOS 都可以运行。不过实际上是运行在一个 Linux 虚拟机上。可能需要注意给这个虚拟机多分配一些 CPU 和内存，以保证编译高效。具体做法请参考[这个issue](https://github.com/PaddlePaddle/Paddle/issues/627)。
+
+- 磁盘不够
+
+  本文中的例子里，`docker run` 命令里都用了 `--rm` 参数，这样保证运行结束之后的 containers 不会保留在磁盘上。可以用 `docker ps -a` 命令看到停止后但是没有删除的 containers。`docker build` 命令有时候会产生一些中间结果，是没有名字的 images，也会占用磁盘。可以参考[这篇文章](https://zaiste.net/posts/removing_docker_containers/)来清理这些内容。
diff --git a/doc/howto/dev/build_en.md b/doc/howto/dev/build_en.md
index 318bf3d384..d0048e3714 100644
--- a/doc/howto/dev/build_en.md
+++ b/doc/howto/dev/build_en.md
@@ -118,3 +118,7 @@ Nothing else.  Not even Python and GCC, because you can install all build tools
 - Docker on Windows/MacOS builds slowly
 
   On Windows and MacOS, Docker containers run in a Linux VM.  You might want to give this VM some more memory and CPUs so to make the building efficient.  Please refer to [this issue](https://github.com/PaddlePaddle/Paddle/issues/627) for details.
+
+- Not enough disk space
+
+  Examples in this article uses option `--rm` with the `docker run` command.  This option ensures that stopped containers do not exist on hard disks.  We can use `docker ps -a` to list all containers, including stopped.  Sometimes `docker build` generates some intermediate dangling images, which also take disk space.  To clean them, please refer to [this article](https://zaiste.net/posts/removing_docker_containers/).

From 721b5020fae92600a0aa2c4093dbeeac63b597c6 Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Sat, 26 Aug 2017 16:34:33 -0700
Subject: [PATCH 122/170] change predict size to label class_num

---
 python/paddle/v2/framework/tests/mnist.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py
index 9a0b109850..9b2dbed25c 100644
--- a/python/paddle/v2/framework/tests/mnist.py
+++ b/python/paddle/v2/framework/tests/mnist.py
@@ -181,7 +181,7 @@ images = data_layer(name='pixel', dims=[BATCH_SIZE, 784])
 labels = data_layer(name='label', dims=[BATCH_SIZE])
 fc1 = fc_layer(net=forward_net, input=images, size=100, act="sigmoid")
 fc2 = fc_layer(net=forward_net, input=fc1, size=100, act="sigmoid")
-predict = fc_layer(net=forward_net, input=fc2, size=100, act="softmax")
+predict = fc_layer(net=forward_net, input=fc2, size=10, act="softmax")
 cost = cross_entropy_layer(net=forward_net, input=predict, label=labels)
 
 init_net.complete_add_op(True)
@@ -223,7 +223,7 @@ def test(cost_name):
         sum(error) / float(len(error))))
 
 
-PASS_NUM = 1
+PASS_NUM = 10
 
 init_net.run(scope, dev_ctx)
 for pass_id in range(PASS_NUM):

From 787cb8ce5d829435e5d45e8bc6bc51cfdcf49272 Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Sat, 26 Aug 2017 16:36:28 -0700
Subject: [PATCH 123/170] reset pass num to 1

---
 python/paddle/v2/framework/tests/mnist.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py
index 9b2dbed25c..a68f302f9c 100644
--- a/python/paddle/v2/framework/tests/mnist.py
+++ b/python/paddle/v2/framework/tests/mnist.py
@@ -223,7 +223,7 @@ def test(cost_name):
         sum(error) / float(len(error))))
 
 
-PASS_NUM = 10
+PASS_NUM = 1
 
 init_net.run(scope, dev_ctx)
 for pass_id in range(PASS_NUM):

From 3120ee5cfbbe6ecf3550b6a338a4c14afe6e4ebd Mon Sep 17 00:00:00 2001
From: dongzhihong <dzhwinter@gmail.com>
Date: Sat, 26 Aug 2017 18:46:06 -0700
Subject: [PATCH 124/170] fix backward doc

---
 paddle/framework/backward.md | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/paddle/framework/backward.md b/paddle/framework/backward.md
index 74c001b06a..c8fa3fefe5 100644
--- a/paddle/framework/backward.md
+++ b/paddle/framework/backward.md
@@ -21,18 +21,32 @@ grad_op_builder(fengjiayi)
 
 given a forward network, it generates the backward network. We only care about the Gradients—`OutputGradients`,`InputGradients`.
 
-1. bla bla bla (yuyang)
+1. Op 
+
+   when the input forward network is a Op, return its gradient Operator Immediately.
 
 2. NetOp 
 
-   when the input forward network is a NetOp, it need to call the sub NetOp/Operators backward function recursively and ensure them done. During the process, we need to collect the `OutputGradients` name.
+   when the input forward network is a NetOp, it need to call the sub NetOp/Operators backward function recursively. During the process, we need to collect the `OutputGradients` name according to forward NetOp.
+
+   **shared variable**. As illustrated in the pictures, two operator's `Output` `Gradient` will overwirte their shared input variable.  
+
+   <p align="center">
+   <img src="./images/duplicate_op.png" width="70%" ><br/>
+
+   1. shared variable in two operators. 
+
+   </p>
+
+   Share variable between operators or same input variable used in multiple operators lead to a duplicate gradient variable. As demo show above, we need to rename gradient name recursively, and add a generic add operator replace the overwirte links. 
+
+   <p align="center">
+   <img src="images/duplicate_op2.png" width="90%" ><br/>
 
-   We share variable in the same scope, as a result, duplicate operator `OutputGradients` will overwirte then duplicate variable.  
+   2. replace shared variable gradient with `Add` Operator
 
-   ![./images/duplicate_op]()
+   </p>
 
-    Share variable between operators or same input variable used in multiple operators lead to a duplicate gradient variable. As demo show above, we need to rename gradient name recursively, and add a generic add operator instead. 
 
-![./images/duplicate_op2]()
 
-​	Then collect the sub graph OutputGradients/InputGradients as the NetOp's and return it.
+​	Then collect the sub graph `OutputGradients`/`InputGradients` as the NetOp's and return it.

From bb5c656b574b1e518da981d781db0e1e0a0e4d75 Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Sat, 26 Aug 2017 19:15:31 -0700
Subject: [PATCH 125/170] test

---
 paddle/framework/backward.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/framework/backward.md b/paddle/framework/backward.md
index c717c2f30b..d5dbd57d19 100644
--- a/paddle/framework/backward.md
+++ b/paddle/framework/backward.md
@@ -6,7 +6,7 @@ In Neural Network, the backpropagation algorithm follows the chain rule, so we n
  
 ## Backward Operator Registry
 
-A backward network is built up with several backward operators. Backward operators take forward operators' inputs, outputs and output gradients, and then calculate its input gradients. In most cases, there is a one-to-one correspondence between forward and backward operators. We use registry mechanism to save these correspondences, which is quite similar with operator registry itself.
+A backward network is built up with several backward operators. Backward operators take forward operators' inputs, outputs and output gradients and then calculate its input gradients. In most cases, there is a one-to-one correspondence between forward and backward operators. We use registry mechanism to save these correspondences, which is quite similar with operator registry itself.
 
 For example, we have got a `add_two_op`, and is registered by the following code:
 

From f646f7991ae49eff00370a03beb958fc88ac62ad Mon Sep 17 00:00:00 2001
From: qingqing01 <dangqingqing@baidu.com>
Date: Sun, 27 Aug 2017 12:01:46 +0800
Subject: [PATCH 126/170] Add chinese doc about how to write new operators.

---
 doc/howto/dev/new_op_cn.md | 300 +++++++++++++++++++++++++++++++++++++
 1 file changed, 300 insertions(+)
 create mode 100644 doc/howto/dev/new_op_cn.md

diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md
new file mode 100644
index 0000000000..df20c15ec6
--- /dev/null
+++ b/doc/howto/dev/new_op_cn.md
@@ -0,0 +1,300 @@
+# 如何写新的Operator
+
+ - [概念简介](#概念简介)
+ - [实现C++类](#实现C++类)
+   - [定义ProtoMaker类](#定义ProtoMaker类)
+   - [定义Operator类](#定义Operator类)
+   - [定义`OpKernel`类](#定义`OpKernel`类)
+   - [注册类](#注册类)
+   - [编译](#编译)
+ - [绑定Python](#绑定Python)
+ - [实现单元测试](#实现单元测试)
+
+
+## 概念简介
+
+简单介绍需要用到基类，详细介绍请参考设计文档。
+
+- `framework::OperatorBase`: Operator(简写，Op)基类。
+- `framework::OpKernel`: Op计算函数的基类，称作Kernel。
+- `framework::OperatorWithKernel`：继承自OperatorBase，Op有计算函数，称作有Kernel。
+- `class OpProtoAndCheckerMaker`：描述该Op的输入、输出、属性、注释,主要用于Python API接口生成
+
+依据是否包含kernel，将Op分为两种：包含Kernel的Op和不包含kernel的Op，前者Op的定义继承自`OperatorBase`，后者继承自`OperatorWithKernel`。本教程主要介绍带Kernel的Op如何写，简单总结如下：
+
+Forward Op需要包含：
+
+   - OpProtoMake定义
+   - Op定义
+   - Kernel实现
+     
+与之对应的Backward Op包含：
+
+   - Op定义
+   - Kernel实现
+
+下面以矩阵乘操作，即[MulOp](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc)为例来介绍如何写带Kernel的Operator。
+
+
+## 实现C++类
+
+
+### 1. 定义ProtoMaker类
+
+矩阵乘的公式：$$Out = X * Y$$ ，可见该计算由两个输入，一个输出组成。首先定义`ProtoMaker`来描述该Op的输入、输出及注释：
+
+    
+
+    ```
+	class MulOpMaker : public framework::OpProtoAndCheckerMaker {
+	 public:
+	  MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+	      : OpProtoAndCheckerMaker(proto, op_checker) {
+	    AddInput("X", "The first input of mul op");
+	    AddInput("Y", "The second input of mul op");
+	    AddOutput("Out", "The output of mul op");
+	    AddComment(R"DOC(
+	Two Element Mul Operator.
+	The equation is: Out = X * Y
+	)DOC");
+	  }
+	};
+    ```
+   
+[`MulOpMaker`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L43)继承自`framework::OpProtoAndCheckerMaker`，构造函数包括2个：
+
+   - `framework::OpProto` ： 前者存储Op的输入输出和参数属性，将用于Python API接口的生成。
+   - `framework::OpAttrChecker` ：后者用于检查参数属性的合法性。
+   
+构造函数里通过`AddInput`添加输入参数，通过`AddOutput`添加输出参数，通过`AddComment`添加该Op的注释，这些函数会将对应内容添加到`OpProto`中。
+
+在`MulOp`中添加两个输入`X`和`Y`，添加了一个输出`Out`，并解释了各自含义，该命名尽可能的规范。
+
+   
+再举个[`ScaleOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/scale_op.cc#L37)的例子：
+   
+```C++
+   template <typename AttrType>
+class ScaleOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  ScaleOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "The input tensor of scale operator.").NotInGradient();
+    AddOutput("Out", "The output tensor of scale operator.").NotInGradient();
+    AddComment(R"DOC(Scale operator
+The equation is: Out = scale*X
+)DOC");
+    AddAttr<AttrType>("scale", "scale of scale operator.").SetDefault(1.0);
+  }
+};
+```
+ 
+ 在这个例子里，两处不同：
+ 
+  - `AddInput("X","...").NotInGradient()` : 表示`X`这个输入不参与`ScaleOp`对应的梯度Op计算之中。
+  - `AddAttr<AttrType>("scale", "...").SetDefault(1.0);` : 增加`scale`系数，作为参数属性，并且设置默认值为1.0。
+   
+
+### 2. 定义Operator类
+
+
+	```C++
+	class MulOp : public framework::OperatorWithKernel {
+	 public:
+	  using framework::OperatorWithKernel::OperatorWithKernel;
+	
+	 protected:
+	  void InferShape(const framework::InferShapeContext &ctx) const override {
+	    auto dim0 = ctx.Input<Tensor>("X")->dims();
+	    auto dim1 = ctx.Input<Tensor>("Y")->dims();
+	    PADDLE_ENFORCE_EQ(dim0.size(), 2,
+	                      "input X(%s) should be a tensor with 2 dims, a matrix",
+	                      ctx.op_.Input("X"));
+	    PADDLE_ENFORCE_EQ(dim1.size(), 2,
+	                      "input Y(%s) should be a tensor with 2 dims, a matrix",
+	                      ctx.op_.Input("Y"));
+	    PADDLE_ENFORCE_EQ(
+	        dim0[1], dim1[0],
+	        "First matrix's width must be equal with second matrix's height.");
+	    ctx.Output<Tensor>("Out")->Resize({dim0[0], dim1[1]});
+	  }
+	};
+	```
+
+[`MulOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L22)继承自`OperatorWithKernel`。`public`成员：
+	 
+```C++
+using framework::OperatorWithKernel::OperatorWithKernel;
+```
+
+这句表示使用基类`OperatorWithKernel`的构造函数，也可写成：
+   
+```C++
+  MulOp(const std::string &type, const framework::VariableNameMap &inputs,
+        const framework::VariableNameMap &outputs,
+        const framework::AttributeMap &attrs)
+    : OperatorWithKernel(type, inputs, outputs, attrs) {}
+```	
+	
+还需要重写`InferShape`接口。`InferShape`为const函数，不能修改Op的成员变量，参数为`const framework::InferShapeContext &ctx`，通过该参数可获取到输入输出以及属性。它的功能是：
+	 - 1). 做检查， 尽早报错：检查输入数据维度、类型等是否合法
+	 - 2). 设置输出Tensor的形状
+
+通常`OpProtoMaker`和`Op`类的定义写在`.cc`文件中，和要讲到的注册函数一起放在`.cc`中
+
+### 3. 定义`OpKernel`类
+
+```C++
+template <typename Place, typename T>
+class MulKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* X = context.Input<Tensor>("X");
+    auto* Y = context.Input<Tensor>("Y");
+    auto* Z = context.Output<Tensor>("Out");
+    Z->mutable_data<T>(context.GetPlace());
+    auto* device_context =
+        const_cast<platform::DeviceContext*>(context.device_context_);
+    math::matmul<Place, T>(*X, false, *Y, false, 1, Z, 0, device_context);
+  }
+};
+```
+
+`MulKernel`继承自`framework::OpKernel`，带有模板参数:
+
+  - `typename  Place`: 表示设备类型，不同设备(CPU、GPU)共享同一个Kernel时，需加该模板参数，不共享则不加，一个不共享的例子是[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。
+  
+ - `typename T` : 表示数据类型，如`float`, `double`等。
+   
+`MulKernel`需要重写`Compute`接口，该接口参数为`const framework::ExecutionContext& context`, `ExecutionContext`相比`InferShapeContext`增加了设备类型，同样可获取到输入输出和属性参数，`Compute`函数里写具体实现时。
+   
+注意，不同设备(CPU、GPU)共享一个Op定义，是否则共享同一个`OpKernel`，取决于`Compute`调用的函数是否支持不同设备。`MulOp`的CPU、GPU实现共享同一个`Kernel`，`OpKernel`不共享的例子可以参考[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 
+   
+到此前向Op实现完成，需要在`.cc`文件中注册该op和kernel。反向Op类的定义和Kernel定义与前向Op类似，这里不再重复。但注意，反向Op没有`ProtoMaker`。
+   
+### 4. 注册类
+
+在`.cc`文件中注册前向、反向Op类，注册CPU Kernel。
+
+    ```C++
+	namespace ops = paddle::operators;
+	REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker, mul_grad, ops::MulOpGrad);
+	REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel<paddle::platform::CPUPlace, float>);
+	REGISTER_OP_CPU_KERNEL(mul_grad,
+                      ops::MulGradKernel<paddle::platform::CPUPlace, float>);
+    ```
+    
+  - `REGISTER_OP` ： 注册`ops::MulOp`类，类型名为`mul`，该类的`ProtoMaker`为`ops::MulOpMaker`，注册`ops::MulOpGrad`，类型名为`mul_grad`，
+  - `REGISTER_OP_WITHOUT_GRADIENT` ： 用于注册没有反向的Op。
+  - `REGISTER_OP_CPU_KERNEL` ：注册`ops::MulKernel`类，并特化模板参数为`paddle::platform::CPUPlace`和`float`类型，同理，注册`ops::MulKernel`类。
+
+在 `.cu`文件中注册GPU Kernel。
+   
+   ```
+	namespace ops = paddle::operators;
+	REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel<paddle::platform::GPUPlace, float>);
+	REGISTER_OP_GPU_KERNEL(mul_grad,
+	                       ops::MulGradKernel<paddle::platform::GPUPlace, float>);
+   ```
+
+### 5. 编译
+
+在[paddle/operators/CMakeLists.txt](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/CMakeLists.txt)文件中添加编译。
+   
+   ```
+   op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function)
+   ```
+   
+下面命令可以编译：
+   
+   ```
+   make mul_op
+   ```
+
+## 绑定Python
+
+ - 绑定Python 
+ 
+   在 [`paddle/pybind/pybind.cc 
+`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/pybind.cc)文件中添加该类：
+
+    ```
+    USE_OP(mul);
+    ```
+    如果只实现了CPU版本，则使用`USE_CPU_ONLY_OP`:
+    
+    ```
+    USE_CPU_ONLY_OP(gather);
+    ```
+    
+    使用`USE_OP`告知编译器需要链接该Op的目标文件，具体解释参考[代码注释](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_registry.h#L81)。
+    
+    
+ - 生成库
+
+   在 [`paddle/pybind/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/CMakeLists.txt)文件添加类到`DEPS`中。
+   
+   ```
+   if(WITH_PYTHON)
+cc_library(paddle_pybind SHARED
+    SRCS pybind.cc
+    DEPS pybind python backward
+    mul_op
+    minus_op)
+endif(WITH_PYTHON)
+   ```
+
+## 实现单元测试
+
+单测包括对比前向Op不同设备(CPU、GPU)的实现、对比反向OP不同设备(CPU、GPU)的实现、反向Op的梯度测试。下面介绍介绍[`MulOp`的单测](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/test_mul_op.py)。
+
+- 前向Op单测
+
+前向Op单测继承自`unittest.TestCase`，并定义元类`__metaclass__ = OpTestMeta`，具体单测流程在`OpTestMeta`里完成。需在`setUp`函数定义输入输出和属性参数，以及Python对比的输出值。
+
+```
+import unittest
+import numpy as np
+from gradient_checker import GradientChecker, create_op
+from op_test_util import OpTestMeta
+
+class TestMulOp(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+
+    def setUp(self):
+        self.type = "mul"
+        self.inputs = {
+            'X': np.random.random((32, 84)).astype("float32"),
+            'Y': np.random.random((84, 100)).astype("float32")
+        }
+        self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])}
+```
+   首先需要`import`必要的包,下面详细解释其他值：
+   
+   - `self.type = "mul" ` : 定义类型，和注册的类型一致。
+   - `self.inputs` : 定义输入，类型为Numpy.array，并初始化。
+   - `self.outputs` : 定义输出，并得到Python结算结果。
+
+ 
+ - 反向Op单测
+
+反向Op单测继承自`GradientChecker`，而`GradientChecker`集成自`unittest.TestCase`，所以反向单测函数需要`test_`开头。
+
+ ```
+ class MulGradOpTest(GradientChecker):
+    def test_mul(self):
+        op = create_op("mul")
+        inputs = {
+            'X': np.random.random((32, 84)).astype("float32"),
+            'Y': np.random.random((84, 100)).astype("float32")
+        }
+        self.compare_grad(op, inputs)      
+        # mul op will enlarge the relative error
+        self.check_grad(
+            op, inputs, set(["X", "Y"]), "Out", max_relative_error=0.5)
+ ```
+
+   - 调用`create_op("mul")`创建反向Op对应的前向Op。
+   - 定义输入`inputs`。
+   - 调用`compare_grad`函数对比CPU、GPU计算结果。
+   - 调用`check_grad`检查梯度稳定性。

From d78521d491d8c6625146137406f3b7402aebe143 Mon Sep 17 00:00:00 2001
From: qingqing01 <dangqingqing@baidu.com>
Date: Sun, 27 Aug 2017 12:11:15 +0800
Subject: [PATCH 127/170] fix doc format.

---
 doc/howto/dev/new_op_cn.md | 160 ++++++++++++++++++-------------------
 1 file changed, 80 insertions(+), 80 deletions(-)

diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md
index df20c15ec6..ebd2cf3ff0 100644
--- a/doc/howto/dev/new_op_cn.md
+++ b/doc/howto/dev/new_op_cn.md
@@ -4,11 +4,13 @@
  - [实现C++类](#实现C++类)
    - [定义ProtoMaker类](#定义ProtoMaker类)
    - [定义Operator类](#定义Operator类)
-   - [定义`OpKernel`类](#定义`OpKernel`类)
+   - [定义OpKernel类](#定义OpKernel类)
    - [注册类](#注册类)
    - [编译](#编译)
  - [绑定Python](#绑定Python)
  - [实现单元测试](#实现单元测试)
+   - [前向Operator单测](#前向Operator单测)
+   - [反向Operator单测](#反向Operator单测)
 
 
 ## 概念简介
@@ -41,25 +43,23 @@ Forward Op需要包含：
 
 ### 1. 定义ProtoMaker类
 
-矩阵乘的公式：$$Out = X * Y$$ ，可见该计算由两个输入，一个输出组成。首先定义`ProtoMaker`来描述该Op的输入、输出及注释：
-
+矩阵乘的公式：$Out = X * Y$, 可见该计算由两个输入，一个输出组成。首先定义`ProtoMaker`来描述该Op的输入、输出及注释：
     
-
-    ```
-	class MulOpMaker : public framework::OpProtoAndCheckerMaker {
-	 public:
-	  MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
-	      : OpProtoAndCheckerMaker(proto, op_checker) {
-	    AddInput("X", "The first input of mul op");
-	    AddInput("Y", "The second input of mul op");
-	    AddOutput("Out", "The output of mul op");
-	    AddComment(R"DOC(
-	Two Element Mul Operator.
-	The equation is: Out = X * Y
-	)DOC");
-	  }
-	};
-    ```
+```
+class MulOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "The first input of mul op");
+    AddInput("Y", "The second input of mul op");
+    AddOutput("Out", "The output of mul op");
+    AddComment(R"DOC(
+Two Element Mul Operator.
+The equation is: Out = X * Y
+)DOC");
+  }
+};
+```
    
 [`MulOpMaker`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L43)继承自`framework::OpProtoAndCheckerMaker`，构造函数包括2个：
 
@@ -73,8 +73,8 @@ Forward Op需要包含：
    
 再举个[`ScaleOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/scale_op.cc#L37)的例子：
    
-```C++
-   template <typename AttrType>
+```
+template <typename AttrType>
 class ScaleOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   ScaleOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
@@ -98,42 +98,42 @@ The equation is: Out = scale*X
 ### 2. 定义Operator类
 
 
-	```C++
-	class MulOp : public framework::OperatorWithKernel {
-	 public:
-	  using framework::OperatorWithKernel::OperatorWithKernel;
-	
-	 protected:
-	  void InferShape(const framework::InferShapeContext &ctx) const override {
-	    auto dim0 = ctx.Input<Tensor>("X")->dims();
-	    auto dim1 = ctx.Input<Tensor>("Y")->dims();
-	    PADDLE_ENFORCE_EQ(dim0.size(), 2,
-	                      "input X(%s) should be a tensor with 2 dims, a matrix",
-	                      ctx.op_.Input("X"));
-	    PADDLE_ENFORCE_EQ(dim1.size(), 2,
-	                      "input Y(%s) should be a tensor with 2 dims, a matrix",
-	                      ctx.op_.Input("Y"));
-	    PADDLE_ENFORCE_EQ(
-	        dim0[1], dim1[0],
-	        "First matrix's width must be equal with second matrix's height.");
-	    ctx.Output<Tensor>("Out")->Resize({dim0[0], dim1[1]});
-	  }
-	};
-	```
+```c++
+class MulOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    auto dim0 = ctx.Input<Tensor>("X")->dims();
+    auto dim1 = ctx.Input<Tensor>("Y")->dims();
+    PADDLE_ENFORCE_EQ(dim0.size(), 2,
+                      "input X(%s) should be a tensor with 2 dims, a matrix",
+                      ctx.op_.Input("X"));
+    PADDLE_ENFORCE_EQ(dim1.size(), 2,
+                      "input Y(%s) should be a tensor with 2 dims, a matrix",
+                      ctx.op_.Input("Y"));
+    PADDLE_ENFORCE_EQ(
+        dim0[1], dim1[0],
+        "First matrix's width must be equal with second matrix's height.");
+    ctx.Output<Tensor>("Out")->Resize({dim0[0], dim1[1]});
+  }
+};
+```
 
 [`MulOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L22)继承自`OperatorWithKernel`。`public`成员：
 	 
-```C++
+```c++
 using framework::OperatorWithKernel::OperatorWithKernel;
 ```
 
 这句表示使用基类`OperatorWithKernel`的构造函数，也可写成：
    
-```C++
-  MulOp(const std::string &type, const framework::VariableNameMap &inputs,
-        const framework::VariableNameMap &outputs,
-        const framework::AttributeMap &attrs)
-    : OperatorWithKernel(type, inputs, outputs, attrs) {}
+```c++
+MulOp(const std::string &type, const framework::VariableNameMap &inputs,
+      const framework::VariableNameMap &outputs,
+      const framework::AttributeMap &attrs)
+  : OperatorWithKernel(type, inputs, outputs, attrs) {}
 ```	
 	
 还需要重写`InferShape`接口。`InferShape`为const函数，不能修改Op的成员变量，参数为`const framework::InferShapeContext &ctx`，通过该参数可获取到输入输出以及属性。它的功能是：
@@ -142,7 +142,7 @@ using framework::OperatorWithKernel::OperatorWithKernel;
 
 通常`OpProtoMaker`和`Op`类的定义写在`.cc`文件中，和要讲到的注册函数一起放在`.cc`中
 
-### 3. 定义`OpKernel`类
+### 3. 定义OpKernel类
 
 ```C++
 template <typename Place, typename T>
@@ -176,13 +176,13 @@ class MulKernel : public framework::OpKernel {
 
 在`.cc`文件中注册前向、反向Op类，注册CPU Kernel。
 
-    ```C++
-	namespace ops = paddle::operators;
-	REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker, mul_grad, ops::MulOpGrad);
-	REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel<paddle::platform::CPUPlace, float>);
-	REGISTER_OP_CPU_KERNEL(mul_grad,
-                      ops::MulGradKernel<paddle::platform::CPUPlace, float>);
-    ```
+```c++
+namespace ops = paddle::operators;
+REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker, mul_grad, ops::MulOpGrad);
+REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(mul_grad,
+              ops::MulGradKernel<paddle::platform::CPUPlace, float>);
+```
     
   - `REGISTER_OP` ： 注册`ops::MulOp`类，类型名为`mul`，该类的`ProtoMaker`为`ops::MulOpMaker`，注册`ops::MulOpGrad`，类型名为`mul_grad`，
   - `REGISTER_OP_WITHOUT_GRADIENT` ： 用于注册没有反向的Op。
@@ -190,32 +190,32 @@ class MulKernel : public framework::OpKernel {
 
 在 `.cu`文件中注册GPU Kernel。
    
-   ```
-	namespace ops = paddle::operators;
-	REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel<paddle::platform::GPUPlace, float>);
-	REGISTER_OP_GPU_KERNEL(mul_grad,
-	                       ops::MulGradKernel<paddle::platform::GPUPlace, float>);
-   ```
+```c++
+namespace ops = paddle::operators;
+REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel<paddle::platform::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(mul_grad,
+                       ops::MulGradKernel<paddle::platform::GPUPlace, float>);
+```
 
 ### 5. 编译
 
 在[paddle/operators/CMakeLists.txt](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/CMakeLists.txt)文件中添加编译。
    
-   ```
-   op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function)
-   ```
+```
+op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function)
+```
    
 下面命令可以编译：
    
-   ```
-   make mul_op
-   ```
+```
+make mul_op
+```
 
 ## 绑定Python
 
- - 绑定Python 
+- 绑定Python 
  
-   在 [`paddle/pybind/pybind.cc 
+    在 [`paddle/pybind/pybind.cc 
 `](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/pybind.cc)文件中添加该类：
 
     ```
@@ -232,23 +232,23 @@ class MulKernel : public framework::OpKernel {
     
  - 生成库
 
-   在 [`paddle/pybind/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/CMakeLists.txt)文件添加类到`DEPS`中。
+   在 [`paddle/pybind/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/CMakeLists.txt)文件添加类到`DEPS`中，使得该Op可以链接到生成的lib库中。
    
    ```
    if(WITH_PYTHON)
-cc_library(paddle_pybind SHARED
-    SRCS pybind.cc
-    DEPS pybind python backward
-    mul_op
-    minus_op)
-endif(WITH_PYTHON)
+     cc_library(paddle_pybind SHARED
+     SRCS pybind.cc
+     DEPS pybind python backward
+     mul_op
+     minus_op)
+   endif(WITH_PYTHON)
    ```
 
 ## 实现单元测试
 
 单测包括对比前向Op不同设备(CPU、GPU)的实现、对比反向OP不同设备(CPU、GPU)的实现、反向Op的梯度测试。下面介绍介绍[`MulOp`的单测](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/test_mul_op.py)。
 
-- 前向Op单测
+### 前向Operator单测
 
 前向Op单测继承自`unittest.TestCase`，并定义元类`__metaclass__ = OpTestMeta`，具体单测流程在`OpTestMeta`里完成。需在`setUp`函数定义输入输出和属性参数，以及Python对比的输出值。
 
@@ -276,7 +276,7 @@ class TestMulOp(unittest.TestCase):
    - `self.outputs` : 定义输出，并得到Python结算结果。
 
  
- - 反向Op单测
+### 反向Operator单测
 
 反向Op单测继承自`GradientChecker`，而`GradientChecker`集成自`unittest.TestCase`，所以反向单测函数需要`test_`开头。
 

From 4a83dde594d0aa6d19aeff7471b040277a8a839f Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Sun, 27 Aug 2017 11:28:05 +0800
Subject: [PATCH 128/170] save parameters into ordered dict.

---
 python/paddle/v2/parameters.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py
index b8af5abaea..475067ef22 100644
--- a/python/paddle/v2/parameters.py
+++ b/python/paddle/v2/parameters.py
@@ -14,6 +14,7 @@
 
 import numpy as np
 from paddle.proto.ParameterConfig_pb2 import ParameterConfig
+from collections import OrderedDict
 import paddle.trainer.config_parser as cp
 import struct
 import tarfile
@@ -62,7 +63,7 @@ class Parameters(object):
     """
 
     def __init__(self):
-        self.__param_conf__ = dict()
+        self.__param_conf__ = OrderedDict()
         self.__gradient_machines__ = []
         self.__tmp_params__ = dict()
 
@@ -231,6 +232,9 @@ class Parameters(object):
         :rtype: np.ndarray
         """
         import py_paddle.swig_paddle as api
+        if self.__param_conf__[key].is_static:
+            return np.zeros(self.__param_conf__[key].size, dtype=np.float32)
+
         return self.__getter_inner(key, api.PARAMETER_GRADIENT)
 
     def set(self, parameter_name, value):

From 4590f793f111dd4fc5134ca9bbd0a213b41962b7 Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Sun, 27 Aug 2017 17:37:41 -0700
Subject: [PATCH 129/170] Update backward document

---
 paddle/framework/backward.md | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/paddle/framework/backward.md b/paddle/framework/backward.md
index b4205fed2e..133b17c7be 100644
--- a/paddle/framework/backward.md
+++ b/paddle/framework/backward.md
@@ -2,32 +2,24 @@
 
 ## Motivation
 
-In Neural Network, the backpropagation algorithm follows the chain rule, so we need to compound the fundmental gradient operators/expressions together with chain rule . Every forward network need a backward network to construct the full computation lineage, the operator/ expression's Backward feature will generate the backward pass respect to forward pass.
- 
+In Neural Network, the backpropagation algorithm follows the chain rule, so we need to compound the fundmental gradient operators/expressions together with chain rule . Every forward network need a backward network to construct the full computation lineage, the operator/expression's backward pass will be generated respect to forward pass.
+  
 ## Backward Operator Registry
 
-A backward network is built up with several backward operators. Backward operators take forward operators' inputs, outputs and output gradients and then calculate its input gradients. In most cases, there is a one-to-one correspondence between forward and backward operators. We use registry mechanism to save these correspondences, which is quite similar with operator registry itself.
+A backward network is built up with several backward operators. Backward operators take forward operators' inputs, outputs and output gradients and then calculate its input gradients. In most cases, there is a one-to-one correspondence between forward and backward operators. We use registry mechanism to save these correspondences.
 
 For example, we have got a `add_two_op`, and is registered by the following code:
 
 ```cpp
-REGISTER_OP(add_two, AddTwoOp, AddTwoOpMaker);
+REGISTER_OP(add_two, AddTwoOp, AddTwoOpMaker, add_two_grad, AddTwoGradOp);
 ```
 
 `add_two` is the operator's type. `AddTwoOp` and `AddTwoOpMaker` are the operator class and the operator maker class respectively.
 
-Assume that we have also got the backward operator of `add_two_op`, which calculating the gradients of `add_two_op`'s inputs. Then we register it by the following way:
-
-```cpp
-REGISTER_GRADIENT_OP(add_two, add_two_grad, AddTwoGradOp);
-```
-
 `add_two_grad` is the type of backward operator, and `AddTwoGradOp` is its class name.
 
 ## Backward Opeartor Creating
 
-### Usage
-
 Given a certain forward operator, we can get its corresponding backward opeartor by calling:
 
 ```cpp
@@ -36,13 +28,13 @@ OperatorBase* bwd_op = BuildGradOp(const OperatorBase* fwd_op);
 
 The function `BuildGradOp` will sequentially execute following processes:
 
-1. Getting the `type_` of given forward operator, and then creating the corresponding backward operator.
+1. Get the `type_` of given forward operator, and then get the corresponding backward operator's type by looking up the `OpInfoMap`.
 
-2. Copying all the attributes of forward operator expect `input_format` and `output_format`(if it has), for their elements differ between forward and backward operators.
+2. Build two maps named `inputs` and `outputs` to temporary storage backward operator's inputs and outputs. Copy forward operator's `inputs_` and `outputs_` to map `inputs`, except these are not necessary for gradient computing.
 
-3. Copying forward operator's `inputs_` and `outputs_` to backward operator's `inputs_`. And adding forward inputs' gradient variables into backward `output_`, adding forward outputs' gradient variables into backward `input_`.
+3. Add forward inputs' gradient variables into map `output`, adding forward outputs' gradient variables into map `input`.
 
-4. Building backward operator's `input_format`, `output_format` (if necessary) and `in_out_idxs_` according to its `inputs_` and `outputs_` just created.
+4. Building backward operator with `inputs`, `outputs` and forward operator's attributes.
 
 ## Backward Network Building
 

From 98b7c6736445de1f287156e933b0d625f648e6da Mon Sep 17 00:00:00 2001
From: tensor-tang <jian.j.tang@intel.com>
Date: Mon, 28 Aug 2017 09:52:58 +0800
Subject: [PATCH 130/170] add todo

---
 paddle/gserver/layers/MKLDNNFcLayer.cpp | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp
index ad50c15a7d..d38e6a2099 100644
--- a/paddle/gserver/layers/MKLDNNFcLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp
@@ -184,15 +184,14 @@ void MKLDNNFcLayer::resetBwd() {
   const MatrixPtr& wgt = weight_->getWGrad();
   const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr;
 
+  // TODO(TJ): merge topdiffs
   if (nextIsMKLDNN()) {
     // can not directly cast outputgrad to mkldnnmatrix,
     // since each layer can not write the inputgrad to mkldnn inputgrad.
     // So just create from matrix with outputvalue format.
     const MatrixPtr& out = getOutput(MKLDNN_DEVICE).grad;
     outGrad_ = MKLDNNMatrix::create(out, outVal_->getPD());
-    // TODO: maybe need merge topdiffs
   } else {
-    // TODO: merge topdiffs
     const MatrixPtr& out = getOutput(CPU_DEVICE).grad;
     // fc do not need to convert from cpu device since output always nc
     // only need create from cpu device
@@ -234,8 +233,7 @@ void MKLDNNFcLayer::resetBwd() {
       return;
     }
     if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) {
-      // TODO: many mkldnn bots
-      // add sum handle
+      // TODO(TJ): use outputMaps_ ways when merge topdiff done
     } else {
       inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD());
     }
@@ -245,8 +243,7 @@ void MKLDNNFcLayer::resetBwd() {
       return;
     }
     if (getInput(0, CPU_DEVICE).getAllCount() > 1) {
-      // TODO: many  bots
-      // add sum handle
+      // TODO(TJ): use outputMaps_ ways when merge topdiff done
     } else {
       inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD());
     }

From be4c0123c4c6cccfaa8fafa9063ce84415854c28 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Mon, 28 Aug 2017 10:11:54 +0800
Subject: [PATCH 131/170] follow comments.

---
 python/paddle/v2/parameters.py | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py
index 475067ef22..cc3adf6f48 100644
--- a/python/paddle/v2/parameters.py
+++ b/python/paddle/v2/parameters.py
@@ -43,9 +43,26 @@ def create(layers):
 
 class Parameters(object):
     """
-    Parameters is a dictionary contains Paddle's parameter. The key of
-    Parameters is the name of parameter. The value of Parameters is a plain
-    :code:`numpy.ndarry` .
+    `Parameters` manages all the learnable parameters in a neural network.
+    It stores parameters' information in an OrderedDict, key of which is
+    the name of a parameter, and value related to a key is a parameter's
+    configuration, such as initialization mean and std, its size, whether it is
+    a static parameter, and so on.
+
+    :param __param_conf__: this member stores the configurations of learnable
+        parameters in a network in an OrderedDict. The parameters are added by
+        following their creation order in the neural network one by one:
+        parameters of the previous layers in a network are careted first.
+        When a user iterates over this dict, he can visit parameters in the
+        network from button to up.
+    :type __param_conf__: OrderedDict
+    :param __gradient_machines__: all of the parameters in a neural network are
+        appended to a Paddle gradient machine, which is used internally to copy
+        the parameter values between the C++ and Python end.
+    :type __gradient_machines__: list
+    :param __tmp_params__: a dict to store dummy parameters if no
+        __gradient_machines__ is appended to `Parameters`.
+    :type __tmp_params__: dict
 
     Basically usage is
 

From 346630f413a2e9aa9cbbdf2af4595a461ec09ac0 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Mon, 28 Aug 2017 11:19:53 +0800
Subject: [PATCH 132/170] Remove "About" tab in "Documentation"

---
 doc/about/index_cn.md  | 11 -----------
 doc/about/index_en.rst | 14 --------------
 doc/index_en.rst       |  1 -
 3 files changed, 26 deletions(-)
 delete mode 100644 doc/about/index_cn.md
 delete mode 100644 doc/about/index_en.rst

diff --git a/doc/about/index_cn.md b/doc/about/index_cn.md
deleted file mode 100644
index 3bf030004d..0000000000
--- a/doc/about/index_cn.md
+++ /dev/null
@@ -1,11 +0,0 @@
-关于PaddlePaddle
-================
-
-PaddlePaddle是一个最早由百度科学家和工程师共同研发的并行分布式深度学习平台，兼备易用性、高效性、灵活性和可扩展性，目前已被百度内部多个产品线广泛使用。
-PaddlePaddle目前已经开放源码, 但是远未完善，我们希望能在这个基础上不断的改进、扩展和延伸。
-同时我们希望广大开发者积极提供反馈和贡献源代码，建立一个活跃的开源社区。
-
-致谢
---------
-
-在此，特别感谢PaddlePaddle的[所有贡献者](https://github.com/PaddlePaddle/Paddle/graphs/contributors)。
diff --git a/doc/about/index_en.rst b/doc/about/index_en.rst
deleted file mode 100644
index 065c430cde..0000000000
--- a/doc/about/index_en.rst
+++ /dev/null
@@ -1,14 +0,0 @@
-ABOUT
-=======
-
-PaddlPaddle is an easy-to-use, efficient, flexible and scalable deep learning platform,
-which is originally developed by Baidu scientists and engineers for the purpose of applying deep learning to many products at Baidu.
-
-PaddlePaddle is now open source but far from complete, which is intended to be built upon, improved, scaled, and extended.
-We hope to build an active open source community both by providing feedback and by actively contributing to the source code.
-
-
-Credits
---------
-
-We owe many thanks to `all contributors and developers <https://github.com/PaddlePaddle/Paddle/graphs/contributors>`_ of PaddlePaddle!
diff --git a/doc/index_en.rst b/doc/index_en.rst
index 168c7667c6..64684b8b9b 100644
--- a/doc/index_en.rst
+++ b/doc/index_en.rst
@@ -7,4 +7,3 @@ PaddlePaddle Documentation
   getstarted/index_en.rst
   howto/index_en.rst
   api/index_en.rst
-  about/index_en.rst

From fe51f726a2da85b0cb96734bd9b156760b044cf9 Mon Sep 17 00:00:00 2001
From: tensor-tang <jian.j.tang@intel.com>
Date: Mon, 28 Aug 2017 10:44:31 +0800
Subject: [PATCH 133/170] fix cmake

---
 paddle/math/CMakeLists.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt
index 8afe6b509d..68b5296228 100644
--- a/paddle/math/CMakeLists.txt
+++ b/paddle/math/CMakeLists.txt
@@ -16,10 +16,10 @@ file(GLOB MATH_HEADERS . *.h)
 file(GLOB MATH_SOURCES . *.cpp)
 
 if(NOT WITH_MKLDNN)
-    file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h")
-    file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp")
-    list(REMOVE_ITEM MATH_HEADERS ${DNN_HEADER})
-    list(REMOVE_ITEM MATH_SOURCES ${DNN_SOURCES})
+    set(DNN_HEADER "${CMAKE_CURRENT_SOURCE_DIR}/MKLDNNMatrix.h")
+    set(DNN_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/MKLDNNMatrix.cpp")
+    list(REMOVE_ITEM MATH_HEADERS "${DNN_HEADER}")
+    list(REMOVE_ITEM MATH_SOURCES "${DNN_SOURCE}")
     message(STATUS "Skip compiling with MKLDNNMatrix")
 else()
     message(STATUS "Compile with MKLDNNMatrix")

From f241773c4f1803631bba968bca1d5621a0d3ced5 Mon Sep 17 00:00:00 2001
From: Liu Yiqun <liuyiqun01@baidu.com>
Date: Wed, 23 Aug 2017 19:43:57 +0800
Subject: [PATCH 134/170] Support to use clang for Android cross-compiling.

---
 Dockerfile.android                     |   4 +-
 cmake/cblas.cmake                      |   4 +
 cmake/external/warpctc.cmake           |   1 +
 paddle/cuda/include/hl_cpu_gru.cuh     | 166 ++++++++++++-------------
 paddle/function/MulOp.cpp              |  37 +++---
 paddle/math/MathFunctions.cpp          |   4 +
 paddle/math/MathFunctions.h            |  23 +++-
 paddle/math/Matrix.cpp                 |  18 ++-
 paddle/scripts/docker/build_android.sh |  51 ++++++--
 9 files changed, 181 insertions(+), 127 deletions(-)

diff --git a/Dockerfile.android b/Dockerfile.android
index aa95abb366..6013215d9d 100644
--- a/Dockerfile.android
+++ b/Dockerfile.android
@@ -47,8 +47,8 @@ RUN mkdir /opt/android-ndk-tmp && \
     wget -q https://dl.google.com/android/repository/android-ndk-r14b-linux-x86_64.zip && \
     unzip -q android-ndk-r14b-linux-x86_64.zip && \
     mv android-ndk-r14b ${ANDROID_NDK_HOME} && \
-    ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm --platform=android-21 --install-dir=${ANDROID_ARM_STANDALONE_TOOLCHAIN} && \
-    ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm64 --platform=android-21 --install-dir=${ANDROID_ARM64_STANDALONE_TOOLCHAIN} && \
+    ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm --platform=android-23 --install-dir=${ANDROID_ARM_STANDALONE_TOOLCHAIN} && \
+    ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm64 --platform=android-23 --install-dir=${ANDROID_ARM64_STANDALONE_TOOLCHAIN} && \
     rm -rf /opt/android-ndk-tmp && \
     rm -rf ${ANDROID_NDK_HOME}
 
diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake
index 854066fd1d..ab111eccc0 100644
--- a/cmake/cblas.cmake
+++ b/cmake/cblas.cmake
@@ -13,6 +13,10 @@
 # system paths.
 #
 
+if(USE_EIGEN_FOR_BLAS)
+  return()
+endif(USE_EIGEN_FOR_BLAS)
+
 set(CBLAS_FOUND OFF)
 
 ## Find MKLML First.
diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake
index 2d7daed9bc..3cc652bed5 100644
--- a/cmake/external/warpctc.cmake
+++ b/cmake/external/warpctc.cmake
@@ -41,6 +41,7 @@ IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "App
 ELSE()
     SET(USE_OMP ON)
 ENDIF()
+SET(USE_OMP OFF FORCE)
 
 ExternalProject_Add(
     extern_warpctc
diff --git a/paddle/cuda/include/hl_cpu_gru.cuh b/paddle/cuda/include/hl_cpu_gru.cuh
index c0a37ced2a..732799a28b 100644
--- a/paddle/cuda/include/hl_cpu_gru.cuh
+++ b/paddle/cuda/include/hl_cpu_gru.cuh
@@ -20,11 +20,11 @@ limitations under the License. */
 
 #include "paddle/math/MathFunctions.h"
 
-#ifndef PADDLE_TYPE_DOUBLE
-#define     CBLAS_GEMM     paddle::gemm<float>
-#else
-#define     CBLAS_GEMM     paddle::gemm<double>
-#endif
+// #ifndef PADDLE_TYPE_DOUBLE
+// #define     CBLAS_GEMM     paddle::gemm<float>
+// #else
+// #define     CBLAS_GEMM     paddle::gemm<double>
+// #endif
 
 template<class OpResetOutput>
 void hl_naive_gru_forward_reset_output(OpResetOutput opResetOutput,
@@ -219,37 +219,37 @@ void hl_cpu_gru_forward(OpResetOutput opResetOutput,
                         hl_activation_mode_t active_node,
                         hl_activation_mode_t active_gate) {
   if (value.prevOutValue) {
-    CBLAS_GEMM(CblasNoTrans,
-               CblasNoTrans,
-               batchSize,
-               2 * frameSize,
-               frameSize,
-               1,
-               value.prevOutValue,
-               frameSize,
-               value.gateWeight,
-               frameSize * 2,
-               1,
-               value.gateValue,
-               frameSize * 3);
+//     CBLAS_GEMM(CblasNoTrans,
+//                CblasNoTrans,
+//                batchSize,
+//                2 * frameSize,
+//                frameSize,
+//                1,
+//                value.prevOutValue,
+//                frameSize,
+//                value.gateWeight,
+//                frameSize * 2,
+//                1,
+//                value.gateValue,
+//                frameSize * 3);
   }
 
   forward_reset_output(opResetOutput, value, frameSize, batchSize, active_gate);
 
   if (value.prevOutValue) {
-    CBLAS_GEMM(CblasNoTrans,
-               CblasNoTrans,
-               batchSize,
-               frameSize,
-               frameSize,
-               1,
-               value.resetOutputValue,
-               frameSize,
-               value.stateWeight,
-               frameSize,
-               1,
-               value.gateValue + frameSize * 2,
-               frameSize * 3);
+//    CBLAS_GEMM(CblasNoTrans,
+//               CblasNoTrans,
+//               batchSize,
+//               frameSize,
+//               frameSize,
+//               1,
+//               value.resetOutputValue,
+//               frameSize,
+//               value.stateWeight,
+//               frameSize,
+//               1,
+//               value.gateValue + frameSize * 2,
+//               frameSize * 3);
   }
 
   forward_final_output(opFinalOutput, value, frameSize, batchSize, active_node);
@@ -538,34 +538,34 @@ void hl_cpu_gru_backward(OpStateGrad opStateGrad,
     frameSize, batchSize, active_node);
 
   if (value.prevOutValue && grad.prevOutGrad) {
-    CBLAS_GEMM(CblasNoTrans,
-               CblasTrans,
-               batchSize,
-               frameSize,
-               frameSize,
-               1,
-               grad.gateGrad + frameSize * 2,
-               frameSize * 3,
-               value.stateWeight,
-               frameSize,
-               0,
-               grad.resetOutputGrad,
-               frameSize);
+//     CBLAS_GEMM(CblasNoTrans,
+//                CblasTrans,
+//                batchSize,
+//                frameSize,
+//                frameSize,
+//                1,
+//                grad.gateGrad + frameSize * 2,
+//                frameSize * 3,
+//                value.stateWeight,
+//                frameSize,
+//                0,
+//                grad.resetOutputGrad,
+//                frameSize);
 
     if (grad.stateWeightGrad) {
-      CBLAS_GEMM(CblasTrans,
-                 CblasNoTrans,
-                 frameSize,
-                 frameSize,
-                 batchSize,
-                 1,
-                 value.resetOutputValue,
-                 frameSize,
-                 grad.gateGrad + frameSize * 2,
-                 frameSize * 3,
-                 1,
-                 grad.stateWeightGrad,
-                 frameSize);
+//       CBLAS_GEMM(CblasTrans,
+//                  CblasNoTrans,
+//                  frameSize,
+//                  frameSize,
+//                  batchSize,
+//                  1,
+//                  value.resetOutputValue,
+//                  frameSize,
+//                  grad.gateGrad + frameSize * 2,
+//                  frameSize * 3,
+//                  1,
+//                  grad.stateWeightGrad,
+//                  frameSize);
     }
   }
 
@@ -573,34 +573,34 @@ void hl_cpu_gru_backward(OpStateGrad opStateGrad,
     frameSize, batchSize, active_gate);
 
   if (grad.prevOutGrad && value.prevOutValue) {
-    CBLAS_GEMM(CblasNoTrans,
-               CblasTrans,
-               batchSize,
-               frameSize,
-               frameSize * 2,
-               1,
-               grad.gateGrad,
-               frameSize * 3,
-               value.gateWeight,
-               frameSize * 2,
-               1,
-               grad.prevOutGrad,
-               frameSize);
+//     CBLAS_GEMM(CblasNoTrans,
+//                CblasTrans,
+//                batchSize,
+//                frameSize,
+//                frameSize * 2,
+//                1,
+//                grad.gateGrad,
+//                frameSize * 3,
+//                value.gateWeight,
+//                frameSize * 2,
+//                1,
+//                grad.prevOutGrad,
+//                frameSize);
 
     if (grad.gateWeightGrad) {
-      CBLAS_GEMM(CblasTrans,
-                 CblasNoTrans,
-                 frameSize,
-                 frameSize * 2,
-                 batchSize,
-                 1,
-                 value.prevOutValue,
-                 frameSize,
-                 grad.gateGrad,
-                 frameSize * 3,
-                 1,
-                 grad.gateWeightGrad,
-                 frameSize * 2);
+//       CBLAS_GEMM(CblasTrans,
+//                  CblasNoTrans,
+//                  frameSize,
+//                  frameSize * 2,
+//                  batchSize,
+//                  1,
+//                  value.prevOutValue,
+//                  frameSize,
+//                  grad.gateGrad,
+//                  frameSize * 3,
+//                  1,
+//                  grad.gateWeightGrad,
+//                  frameSize * 2);
     }
   }
 }
diff --git a/paddle/function/MulOp.cpp b/paddle/function/MulOp.cpp
index 91b4b8ed91..25e41edad5 100644
--- a/paddle/function/MulOp.cpp
+++ b/paddle/function/MulOp.cpp
@@ -13,18 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "MulOp.h"
-/// todo(tianbing), delete it
-#include <iostream>
-#include "paddle/math/MathFunctions.h"
+#include "GemmFunctor.h"
 #include "paddle/math/SIMDFunctions.h"
 #include "paddle/utils/ThreadLocal.h"
 
-#ifndef PADDLE_TYPE_DOUBLE
-#define GEMM paddle::gemm<float>
-#else
-#define GEMM paddle::gemm<double>
-#endif
-
 namespace {
 inline void vecAddTo(real* a, const real* b, real scaleB, size_t len) {
   for (unsigned int i = 0; i < len; ++i) {
@@ -114,19 +106,20 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
                             real scaleT,
                             bool aTrans,
                             bool bTrans) {
-  GEMM(aTrans ? CblasTrans : CblasNoTrans,
-       bTrans ? CblasTrans : CblasNoTrans,
-       out.getHeight(),
-       out.getWidth(),
-       !aTrans ? a.getWidth() : a.getHeight(),
-       scaleAB,
-       a.getData(),
-       a.getStride(),
-       b.getData(),
-       b.getStride(),
-       scaleT,
-       out.getData(),
-       out.getStride());
+  BlasGemm<DEVICE_TYPE_CPU, real>::compute(
+      aTrans,
+      bTrans,
+      out.getHeight(),
+      out.getWidth(),
+      !aTrans ? a.getWidth() : a.getHeight(),
+      scaleAB,
+      a.getData(),
+      a.getStride(),
+      b.getData(),
+      b.getStride(),
+      scaleT,
+      out.getData(),
+      out.getStride());
 }
 
 /// dense matrix (+)= sparse matrix * dense matrix
diff --git a/paddle/math/MathFunctions.cpp b/paddle/math/MathFunctions.cpp
index c8ba1074a1..c2f17beeb8 100644
--- a/paddle/math/MathFunctions.cpp
+++ b/paddle/math/MathFunctions.cpp
@@ -84,6 +84,7 @@ LAPACK_ROUTINE_EACH(DYNAMIC_LOAD_LAPACK_WRAP)
 
 namespace paddle {
 
+#ifndef PADDLE_USE_EIGEN_FOR_BLAS
 template <>
 void gemm<float>(const CBLAS_TRANSPOSE transA,
                  const CBLAS_TRANSPOSE transB,
@@ -143,6 +144,7 @@ void gemm<double>(const CBLAS_TRANSPOSE transA,
               C,
               ldc);
 }
+#endif
 
 template <>
 int getrf<float>(const CBLAS_ORDER order,
@@ -182,6 +184,7 @@ int getri<double>(const CBLAS_ORDER order,
   return dynload::PADDLE_DGETRI(order, N, A, lda, ipiv);
 }
 
+#ifndef PADDLE_USE_EIGEN_FOR_BLAS
 template <>
 void axpy<float>(const int n, const float alpha, const float* x, float* y) {
   cblas_saxpy(n, alpha, x, 1, y, 1);
@@ -201,6 +204,7 @@ template <>
 double dotProduct<double>(const int n, const double* x, const double* y) {
   return cblas_ddot(n, x, 1, y, 1);
 }
+#endif
 
 #if defined(PADDLE_USE_MKL) || defined(PADDLE_USE_MKLML)
 
diff --git a/paddle/math/MathFunctions.h b/paddle/math/MathFunctions.h
index 637643838f..9297ae78c2 100644
--- a/paddle/math/MathFunctions.h
+++ b/paddle/math/MathFunctions.h
@@ -40,7 +40,14 @@ extern "C" {
 
 #ifndef LAPACK_FOUND
 extern "C" {
+#ifndef PADDLE_USE_EIGEN_FOR_BLAS
 #include <cblas.h>
+#else
+typedef enum CBLAS_ORDER {
+  CblasRowMajor = 101,
+  CblasColMajor = 102
+} CBLAS_ORDER;
+#endif
 int LAPACKE_sgetrf(
     int matrix_layout, int m, int n, float* a, int lda, int* ipiv);
 int LAPACKE_dgetrf(
@@ -56,6 +63,7 @@ int LAPACKE_dgetri(
 
 namespace paddle {
 
+#ifndef PADDLE_USE_EIGEN_FOR_BLAS
 template <class T>
 void gemm(const CBLAS_TRANSPOSE transA,
           const CBLAS_TRANSPOSE transB,
@@ -70,6 +78,7 @@ void gemm(const CBLAS_TRANSPOSE transA,
           const T beta,
           T* C,
           const int ldc);
+#endif
 
 template <class T>
 int getrf(const CBLAS_ORDER Order,
@@ -84,10 +93,20 @@ int getri(
     const CBLAS_ORDER Order, const int N, T* A, const int lda, const int* ipiv);
 
 template <class T>
-void axpy(const int n, const T alpha, const T* x, T* y);
+void axpy(const int n, const T alpha, const T* x, T* y) {
+  /// y = y + alpha * x
+  for (int i = 0; i < n; i++) {
+    y[i] = y[i] + alpha * x[i];
+  }
+}
 
 template <class T>
-T dotProduct(const int n, const T* x, const T* y);
+T dotProduct(const int n, const T* x, const T* y) {
+  T result = static_cast<T>(0);
+  for (int i = 0; i < n; i++) {
+    result += x[i] * y[i];
+  }
+}
 
 template <class T>
 void vExp(const int n, const T* a, T* r);
diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp
index 27f7d95b75..fbf3accc9a 100644
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
@@ -28,6 +28,7 @@ limitations under the License. */
 #include "hl_top_k.h"
 #include "paddle/utils/Logging.h"
 
+#include "paddle/function/GemmFunctor.h"
 #include "paddle/utils/ThreadLocal.h"
 
 #include "SIMDFunctions.h"
@@ -2222,24 +2223,29 @@ void CpuMatrix::mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT) {
   CHECK(!isTransposed()) << "Not supported";
 
   size_t a_col, b_col, a_row, b_row;
-  CBLAS_TRANSPOSE a_trans, b_trans;
+  // CBLAS_TRANSPOSE a_trans, b_trans;
+  bool a_trans, b_trans;
   if (!a->isTransposed()) {
     a_col = a->getWidth();
     a_row = a->getHeight();
-    a_trans = CblasNoTrans;
+    // a_trans = CblasNoTrans;
+    a_trans = false;
   } else {
     a_col = a->getHeight();
     a_row = a->getWidth();
-    a_trans = CblasTrans;
+    // a_trans = CblasTrans;
+    a_trans = true;
   }
   if (!b->isTransposed()) {
     b_col = b->getWidth();
     b_row = b->getHeight();
-    b_trans = CblasNoTrans;
+    // b_trans = CblasNoTrans;
+    b_trans = false;
   } else {
     b_col = b->getHeight();
     b_row = b->getWidth();
-    b_trans = CblasTrans;
+    // b_trans = CblasTrans;
+    b_trans = true;
   }
 
   CHECK_EQ(a_col, b_row);
@@ -2256,7 +2262,7 @@ void CpuMatrix::mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT) {
   int lda = a->getStride();
   int ldb = b->getStride();
   int ldc = getStride();
-  gemm<real>(
+  BlasGemm<DEVICE_TYPE_CPU, real>::compute(
       a_trans, b_trans, M, N, K, scaleAB, A, lda, B, ldb, scaleT, C, ldc);
 }
 
diff --git a/paddle/scripts/docker/build_android.sh b/paddle/scripts/docker/build_android.sh
index 593ae28e49..a61c7c40e9 100644
--- a/paddle/scripts/docker/build_android.sh
+++ b/paddle/scripts/docker/build_android.sh
@@ -2,11 +2,31 @@
 
 set -xe
 
-mkdir -p /paddle/build_android/$ANDROID_ABI
-cd /paddle/build_android/$ANDROID_ABI
-rm -rf /paddle/install 2>/dev/null || true
+COMPILER=gcc
+USE_EIGEN=ON
+if [ $COMPILER == clang ]; then
+  SUFFIX=_clang
+  C_COMPILER=clang
+  CXX_COMPILER=clang++
+else
+  SUFFIX=_gcc
+  C_COMPILER=gcc
+  CXX_COMPILER=g++
+fi
+if [ $USE_EIGEN == ON ]; then
+  SUFFIX=${SUFFIX}_eigen
+else
+  SUFFIX=${SUFFIX}_openblas
+fi
 
-THIRD_PARTY_PATH=/paddle/third_party_android/$ANDROID_ABI
+BUILD_ROOT=/paddle/build_android$SUFFIX
+DEST_ROOT=/paddle/install$SUFFIX
+
+rm -rf $BUILD_ROOT 2>/dev/null || true
+mkdir -p $BUILD_ROOT
+cd $BUILD_ROOT
+
+THIRD_PARTY_PATH=/paddle/third_party_android$SUFFIX/$ANDROID_ABI
 
 if [ $ANDROID_ABI == "armeabi-v7a" ]; then
   cmake -DCMAKE_SYSTEM_NAME=Android \
@@ -14,27 +34,34 @@ if [ $ANDROID_ABI == "armeabi-v7a" ]; then
         -DANDROID_ABI=$ANDROID_ABI \
         -DANDROID_ARM_NEON=ON \
         -DANDROID_ARM_MODE=ON \
+        -DCMAKE_C_COMPILER=$ANDROID_ARM_STANDALONE_TOOLCHAIN/bin/arm-linux-androideabi-${C_COMPILER} \
+        -DCMAKE_CXX_COMPILER=$ANDROID_ARM_STANDALONE_TOOLCHAIN/bin/arm-linux-androideabi-${CXX_COMPILER} \
         -DHOST_C_COMPILER=/usr/bin/gcc \
         -DHOST_CXX_COMPILER=/usr/bin/g++ \
-        -DCMAKE_INSTALL_PREFIX=/paddle/install \
+        -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \
         -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \
         -DCMAKE_BUILD_TYPE=Release \
+        -DUSE_EIGEN_FOR_BLAS=${USE_EIGEN} \
         -DWITH_C_API=ON \
         -DWITH_SWIG_PY=OFF \
-        /paddle
-elif [ $ANDROID_ABI == "arm64-v7a" ]; then
+        -DWITH_STYLE_CHECK=OFF \
+        ..
+elif [ $ANDROID_ABI == "arm64-v8a" ]; then
   cmake -DCMAKE_SYSTEM_NAME=Android \
         -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_ARM64_STANDALONE_TOOLCHAIN \
         -DANDROID_ABI=$ANDROID_ABI \
         -DANDROID_ARM_MODE=ON \
+        -DCMAKE_C_COMPILER=$ANDROID_ARM64_STANDALONE_TOOLCHAIN/bin/aarch64-linux-android-${C_COMPILER} \
+        -DCMAKE_CXX_COMPILER=$ANDROID_ARM64_STANDALONE_TOOLCHAIN/bin/aarch64-linux-android-${CXX_COMPILER} \
         -DHOST_C_COMPILER=/usr/bin/gcc \
         -DHOST_CXX_COMPILER=/usr/bin/g++ \
-        -DCMAKE_INSTALL_PREFIX=/paddle/install \
+        -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \
         -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \
         -DCMAKE_BUILD_TYPE=Release \
+        -DUSE_EIGEN_FOR_BLAS=${USE_EIGEN} \
         -DWITH_C_API=ON \
         -DWITH_SWIG_PY=OFF \
-        /paddle
+        ..
 elif [ $ANDROID_ABI == "armeabi" ]; then
   cmake -DCMAKE_SYSTEM_NAME=Android \
         -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_ARM_STANDALONE_TOOLCHAIN \
@@ -47,10 +74,10 @@ elif [ $ANDROID_ABI == "armeabi" ]; then
         -DCMAKE_BUILD_TYPE=Release \
         -DWITH_C_API=ON \
         -DWITH_SWIG_PY=OFF \
-        /paddle
+        ..
 else
   echo "Invalid ANDROID_ABI: $ANDROID_ABI"
 fi
 
-make -j `nproc`
-make install -j `nproc`
+make VERBOSE=1 -j2
+make install -j2

From f0b25c4cfb21b41e8bc7222d44f05a9818dc9b47 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Mon, 28 Aug 2017 12:20:28 +0800
Subject: [PATCH 135/170] follow comments to refine the comments.

---
 python/paddle/v2/parameters.py | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py
index cc3adf6f48..4cfd91882e 100644
--- a/python/paddle/v2/parameters.py
+++ b/python/paddle/v2/parameters.py
@@ -44,21 +44,20 @@ def create(layers):
 class Parameters(object):
     """
     `Parameters` manages all the learnable parameters in a neural network.
-    It stores parameters' information in an OrderedDict, key of which is
-    the name of a parameter, and value related to a key is a parameter's
-    configuration, such as initialization mean and std, its size, whether it is
-    a static parameter, and so on.
-
-    :param __param_conf__: this member stores the configurations of learnable
-        parameters in a network in an OrderedDict. The parameters are added by
-        following their creation order in the neural network one by one:
-        parameters of the previous layers in a network are careted first.
-        When a user iterates over this dict, he can visit parameters in the
-        network from button to up.
+    It stores parameters' information in an OrderedDict. The key is
+    the name of a parameter, and value is a parameter's configuration(in
+    protobuf format), such as initialization mean and std, its size, whether it
+    is a static parameter, and so on.
+
+    :param __param_conf__: store the configurations of learnable parameters in
+        the network in an OrderedDict. Parameter is added one by one into the
+        dict by following their created order in the network: parameters of
+        the previous layers in a network are careted first. You can visit the
+        parameters from bottom to top by iterating over this dict.
     :type __param_conf__: OrderedDict
     :param __gradient_machines__: all of the parameters in a neural network are
-        appended to a Paddle gradient machine, which is used internally to copy
-        the parameter values between the C++ and Python end.
+        appended to a PaddlePaddle gradient machine, which is used internally to
+        copy parameter values between C++ and Python end.
     :type __gradient_machines__: list
     :param __tmp_params__: a dict to store dummy parameters if no
         __gradient_machines__ is appended to `Parameters`.
@@ -271,7 +270,7 @@ class Parameters(object):
         append gradient machine to parameters. This method is used internally in
         Trainer.train.
 
-        :param gradient_machine: Paddle C++ GradientMachine object.
+        :param gradient_machine: PaddlePaddle C++ GradientMachine object.
         :type gradient_machine: api.GradientMachine
         :return:
         """

From 227fdfb65dcb45921398690610886ebdb9b34d98 Mon Sep 17 00:00:00 2001
From: hedaoyuan <hedaoyuan@github.com>
Date: Mon, 28 Aug 2017 13:35:51 +0800
Subject: [PATCH 136/170] Refine NeonDepthwiseConvFunction.

---
 paddle/function/neon/NeonDepthwiseConv.cpp | 70 ++++++++--------------
 1 file changed, 26 insertions(+), 44 deletions(-)

diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp
index 3fe28b1de3..f09e98587d 100644
--- a/paddle/function/neon/NeonDepthwiseConv.cpp
+++ b/paddle/function/neon/NeonDepthwiseConv.cpp
@@ -509,10 +509,9 @@ public:
     size_t filterMultiplier = outputChannels / groups_;
     CHECK_EQ(inputChannels, groups_);
 
-    // only support
+    // only support strideH() == strideW() and filterHeight == filterWidth.
     CHECK_EQ(strideH(), strideW());
     CHECK_EQ(filterHeight, filterWidth);
-    CHECK_LT(strideH(), size_t(3));
 
     float* inputData = inputs[0].data<float>();
     float* filterData = inputs[1].data<float>();
@@ -538,49 +537,32 @@ public:
       inputWidth += 2 * paddingW();
     }
 
-    for (size_t i = 0; i < batchSize; i++) {
-      if (filterWidth == 3 && strideH() == 1) {
-        DepthwiseConvKernel<3, 1>::run(inputPadding,
-                                       filterData,
-                                       inputHeight,
-                                       inputWidth,
-                                       outputChannels,
-                                       outputHeight,
-                                       outputWidth,
-                                       filterMultiplier,
-                                       outputData);
-      } else if (filterWidth == 3 && strideH() == 2) {
-        DepthwiseConvKernel<3, 2>::run(inputPadding,
-                                       filterData,
-                                       inputHeight,
-                                       inputWidth,
-                                       outputChannels,
-                                       outputHeight,
-                                       outputWidth,
-                                       filterMultiplier,
-                                       outputData);
-      } else if (filterWidth == 4 && strideH() == 1) {
-        DepthwiseConvKernel<4, 1>::run(inputPadding,
-                                       filterData,
-                                       inputHeight,
-                                       inputWidth,
-                                       outputChannels,
-                                       outputHeight,
-                                       outputWidth,
-                                       filterMultiplier,
-                                       outputData);
-      } else if (filterWidth == 4 && strideH() == 2) {
-        DepthwiseConvKernel<4, 2>::run(inputPadding,
-                                       filterData,
-                                       inputHeight,
-                                       inputWidth,
-                                       outputChannels,
-                                       outputHeight,
-                                       outputWidth,
-                                       filterMultiplier,
-                                       outputData);
-      }
+    std::function<void(
+        const float*, const float*, int, int, int, int, int, int, float*)>
+        DepthWiseConv;
+
+    if (filterWidth == 3 && strideW() == 1) {
+      DepthWiseConv = DepthwiseConvKernel<3, 1>::run;
+    } else if (filterWidth == 3 && strideW() == 2) {
+      DepthWiseConv = DepthwiseConvKernel<3, 2>::run;
+    } else if (filterWidth == 4 && strideW() == 1) {
+      DepthWiseConv = DepthwiseConvKernel<4, 1>::run;
+    } else if (filterWidth == 4 && strideW() == 2) {
+      DepthWiseConv = DepthwiseConvKernel<4, 2>::run;
+    } else {
+      LOG(FATAL) << "Not supported";
+    }
 
+    for (size_t i = 0; i < batchSize; i++) {
+      DepthWiseConv(inputPadding,
+                    filterData,
+                    inputHeight,
+                    inputWidth,
+                    outputChannels,
+                    outputHeight,
+                    outputWidth,
+                    filterMultiplier,
+                    outputData);
       inputPadding += inputChannels * inputHeight * inputWidth;
       outputData += outputChannels * outputHeight * outputWidth;
     }

From 3a75b4b70cd21449691eaca82f1805759622e640 Mon Sep 17 00:00:00 2001
From: hedaoyuan <hedaoyuan@github.com>
Date: Mon, 28 Aug 2017 14:49:11 +0800
Subject: [PATCH 137/170] Fix CMakeLists.text

---
 paddle/function/CMakeLists.txt          | 2 +-
 paddle/function/DepthwiseConvOpTest.cpp | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt
index 05f808a6a1..f43f15e5ca 100644
--- a/paddle/function/CMakeLists.txt
+++ b/paddle/function/CMakeLists.txt
@@ -44,11 +44,11 @@ if(WITH_GPU)
     add_simple_unittest(RowConvOpTest)
     add_simple_unittest(BlockExpandOpTest)
     add_simple_unittest(CropOpTest)
-    add_simple_unittest(DepthwiseConvOpTest)
 endif()
 
 add_simple_unittest(Im2ColTest)
 add_simple_unittest(GemmConvOpTest)
+add_simple_unittest(DepthwiseConvOpTest)
 endif()
 
 add_style_check_target(paddle_function ${h_files})
diff --git a/paddle/function/DepthwiseConvOpTest.cpp b/paddle/function/DepthwiseConvOpTest.cpp
index bdace2c372..d8e8c889d5 100644
--- a/paddle/function/DepthwiseConvOpTest.cpp
+++ b/paddle/function/DepthwiseConvOpTest.cpp
@@ -34,9 +34,13 @@ TEST(DepthwiseConv, BackwardFilter) {
 }
 #endif
 
+#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+
 TEST(DepthwiseConv, Forward) {
   DepthwiseConvolution<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU>(
       "GemmConv-CPU", "NeonDepthwiseConv-CPU", forward);
 }
 
+#endif
+
 }  // namespace paddle

From 34a92ab41a407679d454f437f1f3118b81dd1b34 Mon Sep 17 00:00:00 2001
From: hedaoyuan <hedaoyuan@github.com>
Date: Mon, 28 Aug 2017 14:58:00 +0800
Subject: [PATCH 138/170] ExpandConvLayer adds support of arm-neon
 acceleration.

---
 paddle/gserver/layers/ExpandConvLayer.cpp | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/paddle/gserver/layers/ExpandConvLayer.cpp b/paddle/gserver/layers/ExpandConvLayer.cpp
index 0ece279931..0e84581769 100644
--- a/paddle/gserver/layers/ExpandConvLayer.cpp
+++ b/paddle/gserver/layers/ExpandConvLayer.cpp
@@ -29,6 +29,10 @@ namespace paddle {
 REGISTER_LAYER(exconv, ExpandConvLayer);
 REGISTER_LAYER(exconvt, ExpandConvLayer);
 
+inline bool isDepthwiseConv(int channels, int groups) {
+  return channels == groups;
+}
+
 bool ExpandConvLayer::init(const LayerMap &layerMap,
                            const ParameterMap &parameterMap) {
   /* Initialize the basic convolutional parent class */
@@ -47,14 +51,23 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
     std::vector<size_t> paddings = {(size_t)paddingY_[i], (size_t)padding_[i]};
     std::vector<size_t> strides = {(size_t)strideY_[i], (size_t)stride_[i]};
 
-    if (useGpu_ && (size_t)groups_[i] == (size_t)channels_[i] && !isDeconv_) {
+    // Convolution Layer uses the GemmConv function by default.
+    convType = "GemmConv";
+    convGradInputType = "GemmConvGradInput";
+    convGradFilterType = "GemmConvGradFilter";
+
+    // If depth wise convolution and useGpu == true
+    if (useGpu_ && isDepthwiseConv(channels_[i], groups_[i]) && !isDeconv_) {
       convType = "DepthwiseConv";
       convGradInputType = "DepthwiseConvGradInput";
       convGradFilterType = "DepthwiseConvGradFilter";
-    } else {
-      convType = "GemmConv";
-      convGradInputType = "GemmConvGradInput";
-      convGradFilterType = "GemmConvGradFilter";
+    }
+
+    // If depth wise convolution and useGpu == false and ARM-NEON
+    if (!useGpu_ && isDepthwiseConv(channels_[i], groups_[i]) && !isDeconv_) {
+#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+      convType = "NeonDepthwiseConv";
+#endif
     }
 
     if (FLAGS_use_nnpack && !isDeconv_) {

From 2710584ff1d5d299361c1b4492d3368ccbdb0378 Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Wed, 23 Aug 2017 22:05:50 +0800
Subject: [PATCH 139/170] fix above comments

---
 python/paddle/trainer/config_parser.py        | 212 ++++++------------
 .../paddle/trainer_config_helpers/layers.py   |  76 ++-----
 .../configs/conv3d_deconv3d_test_config.py    |  97 ++++----
 3 files changed, 130 insertions(+), 255 deletions(-)

diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 49b3c430e7..c0843a7357 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -901,20 +901,14 @@ class Conv3D(Cfg):
                  padding_z=None,
                  stride_z=None):
         self.add_keys(locals())
-        if filter_size_y is None:
-            self.filter_size_y = filter_size
-        if padding_y is None:
-            self.padding_y = padding
-        if stride_y is None:
-            self.stride_y = stride
+        self.filter_size_y = filter_size_y if filter_size_y else filter_size
+        self.filter_size_z = filter_size_z if filter_size_z else filter_size
+        self.padding_y = padding_y if padding_y else padding
+        self.padding_z = padding_z if padding_z else padding
+        self.stride_y = stride_y if stride_y else stride
+        self.stride_z = stride_z if stride_z else stride
         if output_x is not None:
             config_assert(output_x <= 0)
-        if filter_size_z is None:
-            self.filter_size_z = filter_size
-        if padding_z is None:
-            self.padding_z = padding
-        if stride_z is None:
-            self.stride_z = stride
 
 
 @config_class
@@ -1206,10 +1200,10 @@ def get_img_size(input_layer_name, channels):
 def get_img3d_size(input_layer_name, channels):
     input = g_layer_map[input_layer_name]
     img_pixels = input.size / channels
-    img_size = input.width if input.width > 0 else int(img_pixels**0.5)
-    img_size_y = input.height if input.height > 0 else int(img_pixels /
-                                                           img_size)
-    img_size_z = input.depth if input.depth > 1 else 1
+    img_size = input.width
+    img_size_y = input.height
+    img_size_z = input.depth
+
     config_assert(
         img_size * img_size_y * img_size_z == img_pixels,
         "Input layer %s: Incorrect input image size %d * %d * %d for input image pixels %d"
@@ -2000,8 +1994,10 @@ class ConvLayer(ConvLayerBase):
     layer_type = 'cudnn_conv'
 
 
-@config_layer('conv_3d')
-class Conv3DLayerBase(LayerBase):
+@config_layer('convt')
+class ConvTransLayerBase(LayerBase):
+    layer_type = 'convt'
+
     def __init__(self,
                  name,
                  inputs=[],
@@ -2009,7 +2005,7 @@ class Conv3DLayerBase(LayerBase):
                  num_filters=None,
                  shared_biases=False,
                  **xargs):
-        super(Conv3DLayerBase, self).__init__(
+        super(ConvTransLayerBase, self).__init__(
             name, self.layer_type, 0, inputs=inputs, **xargs)
 
         if num_filters is not None:
@@ -2018,12 +2014,17 @@ class Conv3DLayerBase(LayerBase):
         use_gpu = int(g_command_config_args.get("use_gpu", 0))
         parallel_nn = int(g_command_config_args.get("parallel_nn", 0))
 
-        # Automatically select cudnn_type for GPU and exconv for CPU
-        # if set type=conv, but still reserve the way user specify
-        # exconv or cudnn_conv manually.
-        if self.layer_type == "cudnn_conv3d":
-            config_assert(use_gpu, "cudnn_conv3d only support GPU")
+        # Automatically select cudnn_type for GPU and exconvt for CPU
+        # if set type=exconvt, but still reserve the way user specify
+        # exconvt or cudnn_convt manually.
+        if self.layer_type == "cudnn_convt":
+            config_assert(use_gpu, "cudnn_convt only support GPU")
 
+        if (use_gpu == 1 and self.layer_type != "exconvt" and
+            (parallel_nn == 0 or self.config.device > -1)):
+            self.layer_type = "cudnn_convt"
+        else:
+            self.layer_type = "exconvt"
         # need to specify layer in config
         self.config.type = self.layer_type
 
@@ -2032,15 +2033,17 @@ class Conv3DLayerBase(LayerBase):
 
         for input_index in xrange(len(self.inputs)):
             input_layer = self.get_input_layer(input_index)
+            parse_conv(
+                self.inputs[input_index].conv,
+                input_layer.name,
+                self.config.inputs[input_index].conv_conf,
+                num_filters,
+                trans=True)
             conv_conf = self.config.inputs[input_index].conv_conf
-            parse_conv3d(
-                self.inputs[input_index].conv, input_layer.name, conv_conf,
-                num_filters
-            )  # for z-axis pad:0, strid:1, filter_size:1, img_size:1
             psize = self.calc_parameter_size(conv_conf)
             self.create_input_parameter(input_index, psize)
-            self.set_cnn_layer(name, conv_conf.output_z, conv_conf.output_y,
-                               conv_conf.output_x, self.config.num_filters)
+            self.set_cnn_layer(name, conv_conf.img_size_y, conv_conf.img_size,
+                               self.config.num_filters)
 
         psize = self.config.size
         if shared_biases:
@@ -2048,62 +2051,42 @@ class Conv3DLayerBase(LayerBase):
         self.create_bias_parameter(bias, psize, [psize, 1])
 
     def calc_parameter_size(self, conv_conf):
-        return self.config.num_filters * conv_conf.filter_channels \
-               * (conv_conf.filter_size * conv_conf.filter_size_y \
-                  * conv_conf.filter_size_z)
+        return conv_conf.channels * conv_conf.filter_channels \
+                    * (conv_conf.filter_size * conv_conf.filter_size_y)
 
-    def set_layer_height_width(self, depth, height, width):
-        self.config.depth = depth
-        self.config.height = height
-        self.config.width = width
 
-    def set_cnn_layer(self,
-                      input_layer_name,
-                      depth,
-                      height,
-                      width,
-                      channels,
-                      is_print=True):
-        size = depth * height * width * channels
-        self.set_layer_size(size)
-        self.set_layer_height_width(depth, height, width)
-        if is_print:
-            print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" %
-                  (input_layer_name, channels, depth, height, width, size))
+@config_layer('exconvt')
+class ConvTransLayer(ConvTransLayerBase):
+    layer_type = 'exconvt'
 
 
-@config_layer('conv3d')
-class Conv3DLayer(Conv3DLayerBase):
-    layer_type = 'conv3d'
+@config_layer('cudnn_convt')
+class ConvTransLayer(ConvTransLayerBase):
+    layer_type = 'cudnn_convt'
 
 
-@config_layer('convt_3d')
-class Conv3DTransLayerBase(LayerBase):
+@config_layer('conv_3d')
+class Conv3DLayerBase(LayerBase):
     def __init__(self,
                  name,
                  inputs=[],
                  bias=True,
                  num_filters=None,
-                 shared_biases=False,
+                 shared_biases=True,
                  **xargs):
-        super(Conv3DTransLayerBase, self).__init__(
+        super(Conv3DLayerBase, self).__init__(
             name, self.layer_type, 0, inputs=inputs, **xargs)
 
         if num_filters is not None:
             self.config.num_filters = num_filters
 
-        use_gpu = int(g_command_config_args.get("use_gpu", 0))
-        parallel_nn = int(g_command_config_args.get("parallel_nn", 0))
-
-        # Automatically select cudnn_type for GPU and exconv for CPU
-        # if set type=conv, but still reserve the way user specify
-        # exconv or cudnn_conv manually.
-        if self.layer_type == "cudnn_deconv3d":
-            config_assert(use_gpu, "cudnn_conv3d only support GPU")
-
         # need to specify layer in config
         self.config.type = self.layer_type
 
+        trans = False
+        if self.config.type == "deconv3d":
+            trans = True
+
         if shared_biases is not None:
             self.config.shared_biases = shared_biases
 
@@ -2115,12 +2098,17 @@ class Conv3DTransLayerBase(LayerBase):
                 input_layer.name,
                 conv_conf,
                 num_filters,
-                trans=True
+                trans=trans
             )  # for z-axis pad:0, strid:1, filter_size:1, img_size:1
             psize = self.calc_parameter_size(conv_conf)
             self.create_input_parameter(input_index, psize)
-            self.set_cnn_layer(name, conv_conf.img_size_z, conv_conf.img_size_y,
-                               conv_conf.img_size, self.config.num_filters)
+            if trans:
+                self.set_cnn_layer(name, conv_conf.img_size_z,
+                                   conv_conf.img_size_y, conv_conf.img_size,
+                                   self.config.num_filters)
+            else:
+                self.set_cnn_layer(name, conv_conf.output_z, conv_conf.output_y,
+                                   conv_conf.output_x, self.config.num_filters)
 
         psize = self.config.size
         if shared_biases:
@@ -2132,11 +2120,6 @@ class Conv3DTransLayerBase(LayerBase):
                * (conv_conf.filter_size * conv_conf.filter_size_y \
                   * conv_conf.filter_size_z)
 
-    def set_layer_height_width(self, depth, height, width):
-        self.config.depth = depth
-        self.config.height = height
-        self.config.width = width
-
     def set_cnn_layer(self,
                       input_layer_name,
                       depth,
@@ -2146,86 +2129,21 @@ class Conv3DTransLayerBase(LayerBase):
                       is_print=True):
         size = depth * height * width * channels
         self.set_layer_size(size)
-        self.set_layer_height_width(depth, height, width)
+        self.set_layer_height_width(height, width)
+        self.set_layer_depth(depth)
         if is_print:
             print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" %
                   (input_layer_name, channels, depth, height, width, size))
 
 
-@config_layer('deconv3d')
-class DeConv3DLayer(Conv3DTransLayerBase):
-    layer_type = 'deconv3d'
-
-
-@config_layer('convt')
-class ConvTransLayerBase(LayerBase):
-    layer_type = 'convt'
-
-    def __init__(self,
-                 name,
-                 inputs=[],
-                 bias=True,
-                 num_filters=None,
-                 shared_biases=False,
-                 **xargs):
-        super(ConvTransLayerBase, self).__init__(
-            name, self.layer_type, 0, inputs=inputs, **xargs)
-
-        if num_filters is not None:
-            self.config.num_filters = num_filters
-
-        use_gpu = int(g_command_config_args.get("use_gpu", 0))
-        parallel_nn = int(g_command_config_args.get("parallel_nn", 0))
-
-        # Automatically select cudnn_type for GPU and exconvt for CPU
-        # if set type=exconvt, but still reserve the way user specify
-        # exconvt or cudnn_convt manually.
-        if self.layer_type == "cudnn_convt":
-            config_assert(use_gpu, "cudnn_convt only support GPU")
-
-        if (use_gpu == 1 and self.layer_type != "exconvt" and
-            (parallel_nn == 0 or self.config.device > -1)):
-            self.layer_type = "cudnn_convt"
-        else:
-            self.layer_type = "exconvt"
-        # need to specify layer in config
-        self.config.type = self.layer_type
-
-        if shared_biases is not None:
-            self.config.shared_biases = shared_biases
-
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            parse_conv(
-                self.inputs[input_index].conv,
-                input_layer.name,
-                self.config.inputs[input_index].conv_conf,
-                num_filters,
-                trans=True)
-            conv_conf = self.config.inputs[input_index].conv_conf
-            psize = self.calc_parameter_size(conv_conf)
-            self.create_input_parameter(input_index, psize)
-            self.set_cnn_layer(name, conv_conf.img_size_y, conv_conf.img_size,
-                               self.config.num_filters)
-
-        psize = self.config.size
-        if shared_biases:
-            psize = self.config.num_filters
-        self.create_bias_parameter(bias, psize, [psize, 1])
-
-    def calc_parameter_size(self, conv_conf):
-        return conv_conf.channels * conv_conf.filter_channels \
-                    * (conv_conf.filter_size * conv_conf.filter_size_y)
-
-
-@config_layer('exconvt')
-class ConvTransLayer(ConvTransLayerBase):
-    layer_type = 'exconvt'
+@config_layer('conv3d')
+class Conv3DLayer(Conv3DLayerBase):
+    layer_type = 'conv3d'
 
 
-@config_layer('cudnn_convt')
-class ConvTransLayer(ConvTransLayerBase):
-    layer_type = 'cudnn_convt'
+@config_layer('deconv3d')
+class Conv3DLayer(Conv3DLayerBase):
+    layer_type = 'deconv3d'
 
 
 @config_layer('norm')
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 6953f134c5..e3ae81459f 100644
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -6161,12 +6161,6 @@ def img_conv3d_layer(input,
                      param_attr=None,
                      shared_biases=True,
                      layer_attr=None,
-                     filter_size_y=None,
-                     stride_y=None,
-                     padding_y=None,
-                     filter_size_z=None,
-                     stride_z=None,
-                     padding_z=None,
                      trans=False,
                      layer_type=None):
     """
@@ -6175,7 +6169,7 @@ def img_conv3d_layer(input,
 
     ..  code-block:: python
 
-        conv = img_conv3d_layer(input=data, filter_size=1, filter_size_y=1,
+        conv = img_conv3d_layer(input=data, filter_size=1,
                               num_channels=8,
                               num_filters=16, stride=1,
                               bias_attr=False,
@@ -6185,13 +6179,8 @@ def img_conv3d_layer(input,
     :type name: basestring
     :param input: Layer Input.
     :type input: LayerOutput
-    :param filter_size: The x dimension of a filter kernel. Or input a tuple for
-                        two image dimension.
+    :param filter_size: The x dimension of a filter kernel. Or input a list.
     :type filter_size: int|tuple|list
-    :param filter_size_y: The y dimension of a filter kernel. Since PaddlePaddle
-                        currently supports rectangular filters, the filter's
-                        shape will be (filter_size, filter_size_y).
-    :type filter_size_y: int|None
     :param num_filters: Each filter group's number of filter
     :param act: Activation type. Default is tanh
     :type act: BaseActivation
@@ -6200,13 +6189,9 @@ def img_conv3d_layer(input,
     :param stride: The x dimension of the stride. Or input a tuple for two image
                    dimension.
     :type stride: int|tuple|list
-    :param stride_y: The y dimension of the stride.
-    :type stride_y: int
     :param padding: The x dimension of the padding. Or input a tuple for two
                     image dimension
     :type padding: int|tuple|list
-    :param padding_y: The y dimension of the padding.
-    :type padding_y: int
     :param bias_attr: Convolution bias attribute. None means default bias.
                       False means no bias.
     :type bias_attr: ParameterAttribute|False
@@ -6233,47 +6218,26 @@ def img_conv3d_layer(input,
         assert input.num_filters is not None
         num_channels = input.num_filters
 
-    if filter_size_y is None:
-        if isinstance(filter_size, collections.Sequence):
-            assert len(filter_size) == 2
-            filter_size, filter_size_y = filter_size
-        else:
-            filter_size_y = filter_size
-
-    if filter_size_z is None:
-        if isinstance(filter_size, collections.Sequence):
-            assert len(filter_size) == 2
-            filter_size, filter_size_z = filter_size
-        else:
-            filter_size_z = filter_size
-
-    if stride_y is None:
-        if isinstance(stride, collections.Sequence):
-            assert len(stride) == 2
-            stride, stride_y = stride
-        else:
-            stride_y = stride
-
-    if stride_z is None:
-        if isinstance(stride, collections.Sequence):
-            assert len(stride) == 2
-            stride, stride_z = stride
-        else:
-            stride_z = stride
+    if isinstance(filter_size, collections.Sequence):
+        assert len(filter_size) == 3
+        filter_size, filter_size_y, filter_size_z = filter_size
+    else:
+        filter_size_y = filter_size
+        filter_size_z = filter_size
 
-    if padding_y is None:
-        if isinstance(padding, collections.Sequence):
-            assert len(padding) == 2
-            padding, padding_y = padding
-        else:
-            padding_y = padding
+    if isinstance(stride, collections.Sequence):
+        assert len(stride) == 3
+        stride, stride_y, stride_z = stride
+    else:
+        stride_y = stride
+        stride_z = stride
 
-    if padding_z is None:
-        if isinstance(padding, collections.Sequence):
-            assert len(padding) == 2
-            padding, padding_z = padding
-        else:
-            padding_z = padding
+    if isinstance(padding, collections.Sequence):
+        assert len(padding) == 3
+        padding, padding_y, padding_z = padding
+    else:
+        padding_y = padding
+        padding_z = padding
 
     if param_attr.attr.get('initial_smart'):
         # special initial for conv layers.
diff --git a/python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py b/python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py
index da0d23d057..15f7c1d271 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py
@@ -14,23 +14,44 @@ padding_y = 1
 padding_z = 1
 groups = 1
 
-data = data_layer(
-    name='data1', size=12096 * num_channels, height=48, width=42, depth=6)
+data1 = data_layer(name='data1', size=2016 * num_channels, height=48, width=42)
 
-conv3d = img_conv3d_layer(
+img_conv_layer(
+    input=data1,
+    filter_size=filter_size,
+    num_channels=num_channels,
+    num_filters=16,
+    stride=stride,
+    padding=padding,
+    act=LinearActivation(),
+    bias_attr=False)
+
+data = data_layer(
+    name='data', size=12096 * num_channels, height=48, width=42, depth=6)
+# first
+conv3d_1 = img_conv3d_layer(
     input=data,
     name='conv3d_1',
     num_filters=16,
     num_channels=num_channels,
     filter_size=filter_size,
-    filter_size_y=filter_size,
-    filter_size_z=filter_size,
     stride=stride,
-    stride_y=stride_y,
-    stride_z=stride_z,
     padding=padding,
-    padding_y=padding_y,
-    padding_z=padding_z,
+    groups=groups,
+    bias_attr=True,
+    shared_biases=True,
+    trans=False,
+    layer_type="conv3d",
+    act=LinearActivation())
+# second
+conv3d_2 = img_conv3d_layer(
+    input=data,
+    name='conv3d_2',
+    num_filters=16,
+    num_channels=num_channels,
+    filter_size=[filter_size, filter_size_y, filter_size_z],
+    stride=[stride, stride_y, stride_z],
+    padding=[padding, padding_y, padding_z],
     groups=groups,
     bias_attr=True,
     shared_biases=True,
@@ -38,61 +59,33 @@ conv3d = img_conv3d_layer(
     layer_type="conv3d",
     act=LinearActivation())
 
-deconv3d = img_conv3d_layer(
+# first
+deconv3d_1 = img_conv3d_layer(
     input=data,
     name='deconv3d_1',
     num_filters=16,
     num_channels=num_channels,
     filter_size=filter_size,
-    filter_size_y=filter_size,
-    filter_size_z=filter_size,
     stride=stride,
-    stride_y=stride_y,
-    stride_z=stride_z,
     padding=padding,
-    padding_y=padding_y,
-    padding_z=padding_z,
     groups=groups,
     bias_attr=True,
     shared_biases=True,
-    trans=True,
+    trans=False,
     layer_type="deconv3d",
     act=LinearActivation())
-
-data = data_layer(name="input", size=8 * 16 * 16)
-conv1 = img_conv_layer(
-    input=data,
-    filter_size=1,
-    filter_size_y=1,
-    num_channels=8,
-    num_filters=16,
-    stride=1,
-    bias_attr=False,
-    act=ReluActivation(),
-    layer_type="exconv")
-conv2 = img_conv_layer(
-    input=data,
-    filter_size=1,
-    filter_size_y=1,
-    num_channels=8,
-    num_filters=16,
-    stride=1,
-    bias_attr=False,
-    act=ReluActivation(),
-    layer_type="exconv")
-
-concat = concat_layer(input=[conv1, conv2])
-
-conv = img_conv_layer(
+# second
+deconv3d_2 = img_conv3d_layer(
     input=data,
-    filter_size=1,
-    filter_size_y=1,
-    num_channels=8,
+    name='deconv3d_2',
     num_filters=16,
-    stride=1,
+    num_channels=num_channels,
+    filter_size=[filter_size, filter_size_y, filter_size_z],
+    stride=[stride, stride_y, stride_z],
+    padding=[padding, padding_y, padding_z],
+    groups=groups,
     bias_attr=True,
-    act=LinearActivation(),
-    groups=2,
-    layer_type="exconv")
-
-outputs(concat, conv)
+    shared_biases=True,
+    trans=False,
+    layer_type="deconv3d",
+    act=LinearActivation())

From e63ad0a6bdb36967d417633a074e0e966ca55e78 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Mon, 28 Aug 2017 15:15:26 +0800
Subject: [PATCH 140/170] HuberRegressionLoss and HuberTwoClassification
 support multi-dimension data

---
 paddle/gserver/layers/CostLayer.cpp | 67 ++++++++++++++++++-----------
 1 file changed, 41 insertions(+), 26 deletions(-)

diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp
index 7f648070f2..aa4a26a83f 100644
--- a/paddle/gserver/layers/CostLayer.cpp
+++ b/paddle/gserver/layers/CostLayer.cpp
@@ -611,22 +611,26 @@ void HuberRegressionLoss::forwardImp(Matrix& output,
                                      Matrix& target) {
   HuberCost::forwardImp(output, label, target);
   size_t numSamples = target.getHeight();
+  size_t dim = output.getWidth();
   CHECK(label.value);
   CHECK_EQ((*label.value).getHeight(), numSamples);
   CHECK_EQ(output.getHeight(), numSamples);
-  CHECK_EQ(output.getWidth(), (*label.value).getWidth());
+  CHECK_EQ(dim, (*label.value).getWidth());
   CHECK_EQ(target.getWidth(), (size_t)1);
 
   real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
   real* lbl =
       useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData();
-  std::vector<real> cost(numSamples);
+  std::vector<real> cost(numSamples, 0);
   for (size_t i = 0; i < numSamples; ++i) {
-    real a = std::abs(lbl[i] - out[i]);
-    if (a <= delta_)
-      cost[i] = a * a / 2;
-    else
-      cost[i] = delta_ * (a - delta_ / 2);
+    for (size_t j = 0; j < dim; ++j) {
+      int index = i * dim + j;
+      real a = std::abs(lbl[index] - out[index]);
+      if (a <= delta_)
+        cost[i] += a * a / 2;
+      else
+        cost[i] += delta_ * (a - delta_ / 2);
+    }
   }
   target.copyFrom(cost.data(), numSamples);
 }
@@ -635,18 +639,22 @@ void HuberRegressionLoss::backwardImp(Matrix& output,
                                       Argument& label,
                                       Matrix& outputG) {
   size_t numSamples = output.getHeight();
+  size_t dim = output.getWidth();
   real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
   real* lbl =
       useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData();
   real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData();
   for (size_t i = 0; i < numSamples; ++i) {
-    real a = lbl[i] - out[i];
-    if (std::abs(a) <= delta_)
-      grad[i] += -a;
-    else
-      grad[i] += a > 0 ? -delta_ : delta_;
+    for (size_t j = 0; j < dim; ++j) {
+      int index = i * dim + j;
+      real a = lbl[index] - out[index];
+      if (std::abs(a) <= delta_)
+        grad[index] += -a;
+      else
+        grad[index] += a > 0 ? -delta_ : delta_;
+    }
   }
-  if (useGpu_) outputG.copyFrom(grad, numSamples);
+  if (useGpu_) outputG.copyFrom(grad, numSamples * dim);
 }
 
 //
@@ -664,23 +672,25 @@ void HuberTwoClassification::forwardImp(Matrix& output,
                                         Matrix& target) {
   HuberCost::forwardImp(output, label, target);
   size_t numSamples = target.getHeight();
+  size_t dim = output.getWidth();
   CHECK(label.ids);
   CHECK_EQ((*label.ids).getSize(), numSamples);
   CHECK_EQ(output.getHeight(), numSamples);
-  CHECK_EQ(output.getWidth(), (size_t)1);
   CHECK_EQ(target.getWidth(), (size_t)1);
 
   real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
   int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData();
-  std::vector<real> cost(numSamples);
+  std::vector<real> cost(numSamples, 0);
   for (size_t i = 0; i < numSamples; ++i) {
     int y = 2 * lbl[i] - 1;
-    if (out[i] * y < -1)
-      cost[i] = -4 * out[i] * y;
-    else if (out[i] * y < 1)
-      cost[i] = (1 - out[i] * y) * (1 - out[i] * y);
-    else
-      cost[i] = 0;
+    for (size_t j = 0; j < dim; ++j) {
+      int index = i * dim + j;
+      real a = out[index] * y;
+      if (a < -1)
+        cost[i] += -4 * a;
+      else if (a < 1)
+        cost[i] += (1 - a) * (1 - a);
+    }
   }
   target.copyFrom(cost.data(), numSamples);
 }
@@ -689,17 +699,22 @@ void HuberTwoClassification::backwardImp(Matrix& output,
                                          Argument& label,
                                          Matrix& outputG) {
   size_t numSamples = output.getHeight();
+  size_t dim = output.getWidth();
   real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
   int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData();
   real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData();
   for (size_t i = 0; i < numSamples; ++i) {
     int y = 2 * lbl[i] - 1;
-    if (y * out[i] < -1)
-      grad[i] += -4 * y;
-    else if (y * out[i] < 1)
-      grad[i] += -2 * (1 - y * out[i]) * y;
+    for (size_t j = 0; j < dim; ++j) {
+      int index = i * dim + j;
+      real a = out[index] * y;
+      if (a < -1)
+        grad[index] += -4 * y;
+      else if (a < 1)
+        grad[index] += -2 * (1 - a) * y;
+    }
   }
-  if (useGpu_) outputG.copyFrom(grad, numSamples);
+  if (useGpu_) outputG.copyFrom(grad, numSamples * dim);
 }
 /**
  * This cost layer compute the sum of its input as loss.

From b1c0bad9fe8258ac9c12141c07fddb8600f781c5 Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Sat, 26 Aug 2017 13:09:05 +0800
Subject: [PATCH 141/170] Add config parser for pooling3D

---
 paddle/math/Matrix.cpp                        |   2 -
 proto/ModelConfig.proto                       |   1 +
 python/paddle/trainer/config_parser.py        | 120 +++++++++++++-
 .../paddle/trainer_config_helpers/layers.py   | 146 +++++++++++++++++-
 .../tests/configs/test_pooling3D_layer.py     |  38 +++++
 .../tests/layers_test.py                      |   2 +-
 6 files changed, 304 insertions(+), 5 deletions(-)
 create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py

diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp
index 54c2eae475..e93a154556 100644
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
@@ -2255,9 +2255,7 @@ void CpuMatrix::maxPool3DBackward(Matrix& outGrad,
   real* tgtGrad = getData();
   real* otGrad = outGrad.getData();
   real* maxPoolIdxData = maxPoolIdx.getData();
-
   size_t outStride = outGrad.getStride();
-  ;
 
   for (size_t n = 0; n < num; ++n) {
     if (!outGrad.isContiguous()) {
diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto
index 42cf10e9d3..259f3c33c3 100644
--- a/proto/ModelConfig.proto
+++ b/proto/ModelConfig.proto
@@ -495,6 +495,7 @@ message LayerConfig {
   // to indicate rectangle image data
   optional uint64 height = 50;
   optional uint64 width = 51;
+  optional uint64 depth = 57 [ default = 1 ];
 
   // blank label used in ctc loss
   optional uint32 blank = 52 [ default = 0 ];
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index b7b696ef0c..405c5e1f13 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -903,6 +903,31 @@ class Pool(Cfg):
         self.add_keys(locals())
 
 
+@config_class
+class Pool3d(Cfg):
+    def __init__(
+            self,
+            pool_type,
+            channels,
+            size_x,
+            size_y=None,
+            size_z=None,
+            start=None,
+            stride=None,  # 1 by defalut in protobuf
+            stride_y=None,
+            stride_z=None,
+            padding=None,  # 0 by defalut in protobuf
+            padding_y=None,
+            padding_z=None):
+        self.add_keys(locals())
+        self.filter_size_y = size_y if size_y else size_x
+        self.filter_size_z = size_z if size_z else size_x
+        self.padding_y = padding_y if padding_y else padding
+        self.padding_z = padding_z if padding_z else padding
+        self.stride_y = stride_y if stride_y else stride
+        self.stride_z = stride_z if stride_z else stride
+
+
 @config_class
 class SpatialPyramidPool(Cfg):
     def __init__(self, pool_type, pyramid_height, channels):
@@ -1167,6 +1192,20 @@ def get_img_size(input_layer_name, channels):
     return img_size, img_size_y
 
 
+def get_img3d_size(input_layer_name, channels):
+    input = g_layer_map[input_layer_name]
+    img_pixels = input.size / channels
+    img_size = input.width
+    img_size_y = input.height
+    img_size_z = input.depth
+
+    config_assert(
+        img_size * img_size_y * img_size_z == img_pixels,
+        "Input layer %s: Incorrect input image size %d * %d * %d for input image pixels %d"
+        % (input_layer_name, img_size, img_size_y, img_size_z, img_pixels))
+    return img_size, img_size_y, img_size_z
+
+
 def parse_bilinear(bilinear, input_layer_name, bilinear_conf):
     parse_image(bilinear, input_layer_name, bilinear_conf.image_conf)
     bilinear_conf.out_size_x = bilinear.out_size_x
@@ -1204,6 +1243,45 @@ def parse_pool(pool, input_layer_name, pool_conf, ceil_mode):
                                          pool_conf.stride_y, not ceil_mode)
 
 
+def parse_pool3d(pool, input_layer_name, pool_conf, ceil_mode):
+    pool_conf.pool_type = pool.pool_type
+    config_assert(pool.pool_type in ['max-projection', 'avg-projection'],
+                  "pool-type %s is not in "
+                  "['max-projection', 'avg-projection']" % pool.pool_type)
+
+    pool_conf.channels = pool.channels
+
+    pool_conf.size_x = pool.size_x
+    pool_conf.stride = pool.stride
+    pool_conf.padding = pool.padding
+
+    pool_conf.size_y = default(pool.size_y, pool_conf.size_x)
+    pool_conf.size_z = default(pool.size_z, pool_conf.size_x)
+    pool_conf.stride_y = default(pool.stride_y, pool_conf.stride)
+    pool_conf.stride_z = default(pool.stride_z, pool_conf.stride)
+    pool_conf.padding_y = default(pool.padding_y, pool_conf.padding)
+    pool_conf.padding_z = default(pool.padding_z, pool_conf.padding)
+
+    pool_conf.img_size, pool_conf.img_size_y, pool_conf.img_size_z = \
+        get_img3d_size(input_layer_name, pool.channels)
+
+    config_assert(not pool.start, "start is deprecated in pooling.")
+
+    if pool.padding is not None:
+        pool_conf.padding = pool.padding
+    pool_conf.padding_y = default(pool.padding_y, pool_conf.padding)
+    pool_conf.padding_z = default(pool.padding_z, pool_conf.padding)
+    pool_conf.output_x = cnn_output_size(pool_conf.img_size, pool_conf.size_x,
+                                         pool_conf.padding, pool_conf.stride,
+                                         not ceil_mode)
+    pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y,
+                                         pool_conf.padding_y,
+                                         pool_conf.stride_y, not ceil_mode)
+    pool_conf.output_z = cnn_output_size(pool_conf.img_size_z, pool_conf.size_z,
+                                         pool_conf.padding_z,
+                                         pool_conf.stride_z, not ceil_mode)
+
+
 def parse_spp(spp, input_layer_name, spp_conf):
     parse_image(spp, input_layer_name, spp_conf.image_conf)
     spp_conf.pool_type = spp.pool_type
@@ -1580,6 +1658,9 @@ class LayerBase(object):
         self.config.height = height
         self.config.width = width
 
+    def set_layer_depth(self, depth):
+        self.config.depth = depth
+
     def set_cnn_layer(self,
                       input_layer_name,
                       height,
@@ -1763,11 +1844,19 @@ class DetectionOutputLayer(LayerBase):
 
 @config_layer('data')
 class DataLayer(LayerBase):
-    def __init__(self, name, size, height=None, width=None, device=None):
+    def __init__(self,
+                 name,
+                 size,
+                 depth=None,
+                 height=None,
+                 width=None,
+                 device=None):
         super(DataLayer, self).__init__(
             name, 'data', size, inputs=[], device=device)
         if height and width:
             self.set_layer_height_width(height, width)
+        if depth:
+            self.set_layer_depth(depth)
 
 
 '''
@@ -1995,6 +2084,35 @@ class PoolLayer(LayerBase):
                                pool_conf.channels)
 
 
+@config_layer('pool3d')
+class Pool3DLayer(LayerBase):
+    def __init__(self, name, inputs, ceil_mode=True, **xargs):
+        super(Pool3DLayer, self).__init__(
+            name, 'pool3d', 0, inputs=inputs, **xargs)
+        for input_index in xrange(len(self.inputs)):
+            input_layer = self.get_input_layer(input_index)
+            pool_conf = self.config.inputs[input_index].pool_conf
+            parse_pool3d(self.inputs[input_index].pool, input_layer.name,
+                         pool_conf, ceil_mode)
+            self.set_cnn_layer(name, pool_conf.output_z, pool_conf.output_y,
+                               pool_conf.output_x, pool_conf.channels)
+
+    def set_cnn_layer(self,
+                      input_layer_name,
+                      depth,
+                      height,
+                      width,
+                      channels,
+                      is_print=True):
+        size = depth * height * width * channels
+        self.set_layer_size(size)
+        self.set_layer_height_width(height, width)
+        self.set_layer_depth(depth)
+        if is_print:
+            print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" %
+                  (input_layer_name, channels, depth, height, width, size))
+
+
 @config_layer('spp')
 class SpatialPyramidPoolLayer(LayerBase):
     def __init__(self, name, inputs, **xargs):
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 1bc55c8696..5c5e737b56 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -133,6 +133,7 @@ __all__ = [
     'clip_layer',
     'slice_projection',
     'kmax_sequence_score_layer',
+    'img_pool3d_layer',
 ]
 
 
@@ -161,6 +162,7 @@ class LayerType(object):
     EXCONVTRANS_LAYER = 'exconvt'
     CUDNNCONV_LAYER = 'cudnn_conv'
     POOL_LAYER = 'pool'
+    POOL3D_LAYER = 'pool3d'
     BATCH_NORM_LAYER = 'batch_norm'
     NORM_LAYER = 'norm'
     SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm'
@@ -878,7 +880,8 @@ def mixed_layer(size=0,
 
 
 @layer_support()
-def data_layer(name, size, height=None, width=None, layer_attr=None):
+def data_layer(name, size, depth=None, height=None, width=None,
+               layer_attr=None):
     """
     Define DataLayer For NeuralNetwork.
 
@@ -905,6 +908,7 @@ def data_layer(name, size, height=None, width=None, layer_attr=None):
         type=LayerType.DATA,
         name=name,
         size=size,
+        depth=depth,
         height=height,
         width=width,
         **ExtraLayerAttribute.to_kwargs(layer_attr))
@@ -2610,6 +2614,146 @@ def img_pool_layer(input,
         size=l.config.size)
 
 
+@wrap_name_default("pool3d")
+@layer_support()
+def img_pool3d_layer(input,
+                     pool_size,
+                     name=None,
+                     num_channels=None,
+                     pool_type=None,
+                     stride=1,
+                     padding=0,
+                     layer_attr=None,
+                     pool_size_y=None,
+                     stride_y=None,
+                     padding_y=None,
+                     pool_size_z=None,
+                     stride_z=None,
+                     padding_z=None,
+                     ceil_mode=True):
+    """
+    Image pooling Layer.
+
+    The details of pooling layer, please refer ufldl's pooling_ .
+
+    .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/
+
+    - ceil_mode=True:
+
+    ..  math::
+
+        w = 1 + int(ceil(input\_width + 2 * padding - pool\_size) / float(stride))
+        h = 1 + int(ceil(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y))
+        d = 1 + int(ceil(input\_depth + 2 * padding\_z - pool\_size\_z) / float(stride\_z))
+
+    - ceil_mode=False:
+
+    ..  math::
+
+        w = 1 + int(floor(input\_width + 2 * padding - pool\_size) / float(stride))
+        h = 1 + int(floor(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y))
+        d = 1 + int(floor(input\_depth + 2 * padding\_z - pool\_size\_z) / float(stride\_z))
+
+    The example usage is:
+
+    ..  code-block:: python
+
+        maxpool = img_pool3d_layer(input=conv,
+                                 pool_size=3,
+                                 num_channels=8,
+                                 stride=1,
+                                 padding=1,
+                                 pool_type=MaxPooling())
+
+    :param padding: pooling padding width.
+    :type padding: int|tuple|list
+    :param name: name of pooling layer
+    :type name: basestring.
+    :param input: layer's input
+    :type input: LayerOutput
+    :param pool_size: pooling window width
+    :type pool_size: int|tuple|list
+    :param num_channels: number of input channel.
+    :type num_channels: int
+    :param pool_type: pooling type. MaxPooling or AvgPooling. Default is
+                      MaxPooling.
+    :type pool_type: BasePoolingType
+    :param stride: stride width of pooling.
+    :type stride: int|tuple|list
+    :param layer_attr: Extra Layer attribute.
+    :type layer_attr: ExtraLayerAttribute
+    :param ceil_mode: Wether to use ceil mode to calculate output height and with.
+                      Defalut is True. If set false, Otherwise use floor.
+
+    :type ceil_mode: bool
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+    """
+    if num_channels is None:
+        assert input.num_filters is not None
+        num_channels = input.num_filters
+
+    if pool_type is None:
+        pool_type = MaxPooling()
+    elif isinstance(pool_type, AvgPooling):
+        pool_type.name = 'avg'
+
+    type_name = pool_type.name + '-projection' \
+        if (
+        isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \
+        else pool_type.name
+
+    if isinstance(pool_size, collections.Sequence):
+        assert len(pool_size) == 3
+        pool_size, pool_size_y, pool_size_z = pool_size
+    else:
+        pool_size_y = pool_size
+        pool_size_z = pool_size
+
+    if isinstance(stride, collections.Sequence):
+        assert len(stride) == 3
+        stride, stride_y, stride_z = stride
+    else:
+        stride_y = stride
+        stride_z = stride
+
+    if isinstance(padding, collections.Sequence):
+        assert len(padding) == 3
+        padding, padding_y, padding_y = padding
+    else:
+        padding_y = padding
+        padding_z = padding
+
+    l = Layer(
+        name=name,
+        type=LayerType.POOL3D_LAYER,
+        inputs=[
+            Input(
+                input.name,
+                pool=Pool3d(
+                    pool_type=type_name,
+                    channels=num_channels,
+                    size_x=pool_size,
+                    start=None,
+                    stride=stride,
+                    padding=padding,
+                    size_y=pool_size_y,
+                    stride_y=stride_y,
+                    padding_y=padding_y,
+                    size_z=pool_size_z,
+                    stride_z=stride_z,
+                    padding_z=padding_z))
+        ],
+        ceil_mode=ceil_mode,
+        **ExtraLayerAttribute.to_kwargs(layer_attr))
+    return LayerOutput(
+        name,
+        LayerType.POOL_LAYER,
+        parents=[input],
+        num_filters=num_channels,
+        size=l.config.size)
+
+
 @wrap_name_default("spp")
 @layer_support()
 def spp_layer(input,
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py
new file mode 100644
index 0000000000..0dbb921d41
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py
@@ -0,0 +1,38 @@
+from paddle.trainer_config_helpers import *
+
+settings(batch_size=100, learning_rate=1e-5)
+
+data_2d = data_layer(name='data_2d', size=6000, height=20, width=10)
+
+pool_2d = img_pool_layer(
+    name="pool___2d",
+    input=data_2d,
+    num_channels=30,
+    pool_size=5,
+    stride=3,
+    padding=1,
+    pool_type=AvgPooling())
+outputs(pool_2d)
+
+data_3d = data_layer(
+    name='data_3d_1', size=60000, depth=10, height=20, width=10)
+
+pool_3d_1 = img_pool3d_layer(
+    name="pool_3d_1",
+    input=data_3d,
+    num_channels=30,
+    pool_size=5,
+    stride=3,
+    padding=1,
+    pool_type=AvgPooling())
+outputs(pool_3d_1)
+
+pool_3d_2 = img_pool3d_layer(
+    name="pool_3d_2",
+    input=data_3d,
+    num_channels=30,
+    pool_size=[5, 5, 5],
+    stride=[3, 3, 3],
+    padding=[1, 1, 1],
+    pool_type=MaxPooling())
+outputs(pool_3d_2)
diff --git a/python/paddle/trainer_config_helpers/tests/layers_test.py b/python/paddle/trainer_config_helpers/tests/layers_test.py
index 05902ea293..52218972bf 100644
--- a/python/paddle/trainer_config_helpers/tests/layers_test.py
+++ b/python/paddle/trainer_config_helpers/tests/layers_test.py
@@ -16,4 +16,4 @@ from paddle.trainer.config_parser import parse_config_and_serialize
 
 if __name__ == '__main__':
     parse_config_and_serialize(
-        'trainer_config_helpers/tests/layers_test_config.py', '')
+        'trainer_config_helpers/tests/configs/test_pooling3D_layer.py', '')

From 6053f7e36b19a06da14c970a1e4f25a02d1dbcaf Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Mon, 28 Aug 2017 18:10:44 +0800
Subject: [PATCH 142/170] fix previous comments(c++)

---
 paddle/cuda/include/hl_matrix.h         | 2 +-
 paddle/gserver/layers/Conv3DLayer.cpp   | 6 ------
 paddle/gserver/layers/DeConv3DLayer.cpp | 6 ------
 3 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/paddle/cuda/include/hl_matrix.h b/paddle/cuda/include/hl_matrix.h
index a37921b749..c7f2510997 100644
--- a/paddle/cuda/include/hl_matrix.h
+++ b/paddle/cuda/include/hl_matrix.h
@@ -241,7 +241,7 @@ extern void hl_matrix_rotate(
  * @param[in]   paddingD   padding in the depth.
  * @param[in]   paddingH   padding in the height.
  * @param[in]   paddingW   padding in the width.
- * @param[out]   matDst     output matrix.
+ * @param[out]   dataDst     output matrix.
  *
  */
 extern void hl_matrix_vol2Col(const real* dataSrc,
diff --git a/paddle/gserver/layers/Conv3DLayer.cpp b/paddle/gserver/layers/Conv3DLayer.cpp
index db907bbab1..7cc9937cce 100644
--- a/paddle/gserver/layers/Conv3DLayer.cpp
+++ b/paddle/gserver/layers/Conv3DLayer.cpp
@@ -53,18 +53,12 @@ bool Conv3DLayer::init(const LayerMap &layerMap,
 
 size_t Conv3DLayer::getSize() {
   CHECK_NE(inputLayers_.size(), 0UL);
-  // imgSizeH_.clear();
-  // imgSizeW_.clear();
-  // imgSizeD_.clear();
   outputH_.clear();
   outputW_.clear();
   outputD_.clear();
   N_.clear();
   size_t layerSize = 0;
   for (size_t i = 0; i < inputLayers_.size(); ++i) {
-    // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight());
-    // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth());
-    // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth());
     outputW_.push_back(outputSize(
         imgSizeW_[i], filterSize_[i], padding_[i], stride_[i], true));
     outputH_.push_back(outputSize(
diff --git a/paddle/gserver/layers/DeConv3DLayer.cpp b/paddle/gserver/layers/DeConv3DLayer.cpp
index b18c06e36c..7d5c772c89 100644
--- a/paddle/gserver/layers/DeConv3DLayer.cpp
+++ b/paddle/gserver/layers/DeConv3DLayer.cpp
@@ -53,9 +53,6 @@ bool DeConv3DLayer::init(const LayerMap &layerMap,
 
 size_t DeConv3DLayer::getSize() {
   CHECK_NE(inputLayers_.size(), 0UL);
-  // imgSizeH_.clear();
-  // imgSizeW_.clear();
-  // imgSizeD_.clear();
   outputH_.clear();
   outputW_.clear();
   outputD_.clear();
@@ -63,9 +60,6 @@ size_t DeConv3DLayer::getSize() {
   NOut_.clear();
   size_t layerSize = 0;
   for (size_t i = 0; i < inputLayers_.size(); ++i) {
-    // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight());
-    // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth());
-    // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth());
     outputW_.push_back(
         imageSize(imgSizeW_[i], filterSize_[i], padding_[i], stride_[i], true));
     outputH_.push_back(imageSize(

From 5df384d67ff498c9438b2ef7dc9566af7d50c97a Mon Sep 17 00:00:00 2001
From: hedaoyuan <hedaoyuan@github.com>
Date: Mon, 28 Aug 2017 19:36:18 +0800
Subject: [PATCH 143/170] Remove NeonDepthwiseConv.h

---
 paddle/function/neon/NeonDepthwiseConv.h | 25 ------------------------
 1 file changed, 25 deletions(-)
 delete mode 100644 paddle/function/neon/NeonDepthwiseConv.h

diff --git a/paddle/function/neon/NeonDepthwiseConv.h b/paddle/function/neon/NeonDepthwiseConv.h
deleted file mode 100644
index 23e4be1921..0000000000
--- a/paddle/function/neon/NeonDepthwiseConv.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-namespace paddle {
-
-namespace neon {
-
-template <int filterSize, int stride>
-struct DepthwiseConvKernel {};
-
-}  // namespace neon
-}  // namespace paddle

From 4f0c071e4909ff041f3a86c3a40c482becf50845 Mon Sep 17 00:00:00 2001
From: qijun <qijun1994@hotmail.com>
Date: Mon, 28 Aug 2017 22:18:11 +0800
Subject: [PATCH 144/170] refine backward

---
 paddle/framework/backward.cc | 5 ++++-
 paddle/operators/net_op.cc   | 9 ++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc
index bfda18724c..6b4c612cd8 100644
--- a/paddle/framework/backward.cc
+++ b/paddle/framework/backward.cc
@@ -124,6 +124,9 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
     std::list<Pos> insert_position;
     for (auto& dup_output_op : dup_output_ops) {
       const std::string& name = dup_output_op.first;
+      // duplicate @Empty@ don't need to be added
+      if (name == kEmptyVarName) continue;
+
       auto& dup_op = dup_output_op.second;
       // no duplicate output
       if (dup_op.size() == 1) continue;
@@ -209,7 +212,7 @@ std::unique_ptr<OperatorBase> Backward(
     const OperatorBase& forwardOp,
     const std::unordered_set<std::string>& no_grad_vars) {
   std::unordered_set<std::string> no_grad_names;
-  no_grad_names.reserve(no_grad_vars.size());
+  no_grad_names.reserve(no_grad_vars.size() + 1);
 
   no_grad_names.insert(std::string(kEmptyVarName) + kGradVarSuffix);
 
diff --git a/paddle/operators/net_op.cc b/paddle/operators/net_op.cc
index 44d925f0b0..78b5e27678 100644
--- a/paddle/operators/net_op.cc
+++ b/paddle/operators/net_op.cc
@@ -31,10 +31,13 @@ void NetOp::CompleteAddOp(bool calc) {
   for (auto& op : ops_) {
     for (auto& ipt : op->Inputs()) {
       for (auto& var_name : ipt.second) {
-        if (!Contains(output_set, var_name)) {  // Not other op's output
-          input_set.insert(var_name);
-        } else {
+        // If input variable has been in output set, then it will be
+        // added into intermediate_outputs_. Otherwise, it will be
+        // added into input set.
+        if (Contains(output_set, var_name)) {
           intermediate_outputs_.insert(var_name);
+        } else {
+          input_set.insert(var_name);
         }
       }
     }

From 980edfa69a72f57dea689d1d5b1bff6b388e7a71 Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Mon, 28 Aug 2017 11:34:24 -0700
Subject: [PATCH 145/170] Refine backward document

---
 paddle/framework/backward.md | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/paddle/framework/backward.md b/paddle/framework/backward.md
index 133b17c7be..ce324a73f0 100644
--- a/paddle/framework/backward.md
+++ b/paddle/framework/backward.md
@@ -6,9 +6,16 @@ In Neural Network, the backpropagation algorithm follows the chain rule, so we n
   
 ## Backward Operator Registry
 
-A backward network is built up with several backward operators. Backward operators take forward operators' inputs, outputs and output gradients and then calculate its input gradients. In most cases, there is a one-to-one correspondence between forward and backward operators. We use registry mechanism to save these correspondences.
+A backward network is built up with several backward operators. Backward operators take forward operators' inputs, outputs and output gradients and then calculate its input gradients.
 
-For example, we have got a `add_two_op`, and is registered by the following code:
+-|                        | forward operator | backward operator 
+-| ---------------------- | ---------------- |------------------------- |		
+-| **Operator::inputs_**  | Inputs       | Inputs, Outputs, OutputGradients |	
+-| **Operator::outputs_** | Outputs          | InputGradients            |
+
+ In most cases, there is a one-to-one correspondence between forward and backward operators. These correspondences are recorded by a global hash map(`OpInfoMap`). To follow the philosophy of minimum core and make operators pluggable, the registry mechanism is introduced.
+
+For example, we have got a `add_two_op`, and we can register it's information and corresponding backward operator by the following macro:
 
 ```cpp
 REGISTER_OP(add_two, AddTwoOp, AddTwoOpMaker, add_two_grad, AddTwoGradOp);

From eaeb69f98f70bbea4fe4aae9f7c7b830f75959c5 Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Mon, 28 Aug 2017 13:47:37 -0700
Subject: [PATCH 146/170] Follow reviewer's comments

---
 paddle/framework/backward.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/paddle/framework/backward.md b/paddle/framework/backward.md
index ce324a73f0..8aa6728a95 100644
--- a/paddle/framework/backward.md
+++ b/paddle/framework/backward.md
@@ -2,28 +2,28 @@
 
 ## Motivation
 
-In Neural Network, the backpropagation algorithm follows the chain rule, so we need to compound the fundmental gradient operators/expressions together with chain rule . Every forward network need a backward network to construct the full computation lineage, the operator/expression's backward pass will be generated respect to forward pass.
+In Neural Network, the backpropagation algorithm follows the chain rule, so we need to compound the fundmental gradient operators/expressions together with chain rule . Every forward network need a backward network to construct the full computation graph, the operator/expression's backward pass will be generated respect to forward pass.
   
 ## Backward Operator Registry
 
 A backward network is built up with several backward operators. Backward operators take forward operators' inputs, outputs and output gradients and then calculate its input gradients.
 
--|                        | forward operator | backward operator 
--| ---------------------- | ---------------- |------------------------- |		
--| **Operator::inputs_**  | Inputs       | Inputs, Outputs, OutputGradients |	
--| **Operator::outputs_** | Outputs          | InputGradients            |
+|                        | forward operator | backward operator 
+| ---------------------- | ---------------- |------------------------- |		
+| **Operator::inputs_**  | Inputs       | Inputs, Outputs, OutputGradients |	
+| **Operator::outputs_** | Outputs          | InputGradients            |
 
  In most cases, there is a one-to-one correspondence between forward and backward operators. These correspondences are recorded by a global hash map(`OpInfoMap`). To follow the philosophy of minimum core and make operators pluggable, the registry mechanism is introduced.
 
-For example, we have got a `add_two_op`, and we can register it's information and corresponding backward operator by the following macro:
+For example, we have got a `mul_op`, and we can register it's information and corresponding backward operator by the following macro:
 
 ```cpp
-REGISTER_OP(add_two, AddTwoOp, AddTwoOpMaker, add_two_grad, AddTwoGradOp);
+REGISTER_OP(mul, MulOp, MulOpMaker, mul_grad, MulOpGrad);
 ```
 
-`add_two` is the operator's type. `AddTwoOp` and `AddTwoOpMaker` are the operator class and the operator maker class respectively.
+`mul` is the operator's type. `MulOp` and `MulOpMaker` are the operator class and the operator maker class respectively.
 
-`add_two_grad` is the type of backward operator, and `AddTwoGradOp` is its class name.
+`mul_grad` is the type of backward operator, and `MulOpGrad` is its class name.
 
 ## Backward Opeartor Creating
 

From c19eae4c8e7923aa52dc05560dcc91b8b6d58de8 Mon Sep 17 00:00:00 2001
From: qingqing01 <dangqingqing@baidu.com>
Date: Tue, 29 Aug 2017 15:46:52 +0800
Subject: [PATCH 147/170] update doc about how to write new operators.

---
 doc/howto/dev/new_op_cn.md                    | 56 +++++++++++++------
 .../v2/framework/tests/gradient_checker.py    |  2 +-
 2 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md
index ebd2cf3ff0..228b3fd643 100644
--- a/doc/howto/dev/new_op_cn.md
+++ b/doc/howto/dev/new_op_cn.md
@@ -5,12 +5,13 @@
    - [定义ProtoMaker类](#定义ProtoMaker类)
    - [定义Operator类](#定义Operator类)
    - [定义OpKernel类](#定义OpKernel类)
-   - [注册类](#注册类)
+   - [注册Operator](#注册Operator)
    - [编译](#编译)
  - [绑定Python](#绑定Python)
  - [实现单元测试](#实现单元测试)
    - [前向Operator单测](#前向Operator单测)
    - [反向Operator单测](#反向Operator单测)
+   - [编译和执行](#编译和执行)
 
 
 ## 概念简介
@@ -22,19 +23,17 @@
 - `framework::OperatorWithKernel`：继承自OperatorBase，Op有计算函数，称作有Kernel。
 - `class OpProtoAndCheckerMaker`：描述该Op的输入、输出、属性、注释,主要用于Python API接口生成
 
-依据是否包含kernel，将Op分为两种：包含Kernel的Op和不包含kernel的Op，前者Op的定义继承自`OperatorBase`，后者继承自`OperatorWithKernel`。本教程主要介绍带Kernel的Op如何写，简单总结如下：
+依据是否包含kernel，将Op分为两种：包含Kernel的Op和不包含kernel的Op，前者Op的定义继承自`OperatorBase`，后者继承自`OperatorWithKernel`。本教程主要介绍带Kernel的Op如何写，简单总结Op需要包含的内容如下：
 
-Forward Op需要包含：
-
-   - OpProtoMake定义
-   - Op定义
-   - Kernel实现
+  
+ 内容            | 定义位置         
+--------------  | :----------------------  
+OpProtoMake定义  | `.cc`文件，Backward Op不需要定义OpProtoMake
+Op定义           | `.cc`文件 
+Kernel实现       | CPU、GPU共享Kernel在`.h`文件，否则，CPU可以在`.cc`文件，GPU可在`.cu`文件。 
+注册Op           | Op注册在`.cc`文件；Kernel注册CPU在`.cc`文件，GPU在`.cu`文件
+     
      
-与之对应的Backward Op包含：
-
-   - Op定义
-   - Kernel实现
-
 下面以矩阵乘操作，即[MulOp](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc)为例来介绍如何写带Kernel的Operator。
 
 
@@ -137,8 +136,9 @@ MulOp(const std::string &type, const framework::VariableNameMap &inputs,
 ```	
 	
 还需要重写`InferShape`接口。`InferShape`为const函数，不能修改Op的成员变量，参数为`const framework::InferShapeContext &ctx`，通过该参数可获取到输入输出以及属性。它的功能是：
-	 - 1). 做检查， 尽早报错：检查输入数据维度、类型等是否合法
-	 - 2). 设置输出Tensor的形状
+
+  - 1). 做检查， 尽早报错：检查输入数据维度、类型等是否合法。
+  - 2). 设置输出Tensor的形状。
 
 通常`OpProtoMaker`和`Op`类的定义写在`.cc`文件中，和要讲到的注册函数一起放在`.cc`中
 
@@ -172,7 +172,7 @@ class MulKernel : public framework::OpKernel {
    
 到此前向Op实现完成，需要在`.cc`文件中注册该op和kernel。反向Op类的定义和Kernel定义与前向Op类似，这里不再重复。但注意，反向Op没有`ProtoMaker`。
    
-### 4. 注册类
+### 4. 注册Operator
 
 在`.cc`文件中注册前向、反向Op类，注册CPU Kernel。
 
@@ -297,4 +297,28 @@ class TestMulOp(unittest.TestCase):
    - 调用`create_op("mul")`创建反向Op对应的前向Op。
    - 定义输入`inputs`。
    - 调用`compare_grad`函数对比CPU、GPU计算结果。
-   - 调用`check_grad`检查梯度稳定性。
+   - 调用`check_grad`检查梯度稳定性，这里采用数值法检测梯度正确性。
+      - 第一个参数`op` : 前向op。
+      - 第二个参数`inputs` : 输入词典，词典的Key和`ProtoMaker`定义保持一致。
+      - 第三个参数`set(["X", "Y"])` : 指定对输入变量`X`、`Y`做梯度检测。
+      - 第四个参数`"Out"` : 指定前向网络最终的输出目标变量`Out`
+
+
+### 编译和执行 
+
+单测完成之后，在[`python/paddle/v2/framework/tests/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/CMakeLists.txt)里添加编译：
+
+```
+py_test(test_mul_op SRCS test_mul_op.py)
+```
+
+编译完成之后即可执行单测：
+
+```
+make test ARGS="-R test_mul_op -V"
+```
+或者:
+
+```
+ctest -R test_mul_op
+```
diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py
index 9a7a7fbf5e..02cfb9b2c4 100644
--- a/python/paddle/v2/framework/tests/gradient_checker.py
+++ b/python/paddle/v2/framework/tests/gradient_checker.py
@@ -268,7 +268,7 @@ class GradientChecker(unittest.TestCase):
         :param input_vars: numpy value of input variable. The following
             computation will use these variables.
         :param inputs_to_check: inputs var names that should check gradient.
-        :param output_name: output name that used to
+        :param output_name: the final output variable name.
         :param max_relative_error: The relative tolerance parameter.
         :param no_grad_set: used when create backward ops
         :param only_cpu: only compute and check gradient on cpu kernel.

From b336119424d3fc0d9ffa39688612a83c23c6e10e Mon Sep 17 00:00:00 2001
From: qingqing01 <dangqingqing@baidu.com>
Date: Tue, 29 Aug 2017 16:03:07 +0800
Subject: [PATCH 148/170] Add WITH_TESTING=ON for cmake in the operators
 writing guide doc.

---
 doc/howto/dev/new_op_cn.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md
index 228b3fd643..7f8da2da5a 100644
--- a/doc/howto/dev/new_op_cn.md
+++ b/doc/howto/dev/new_op_cn.md
@@ -312,7 +312,7 @@ class TestMulOp(unittest.TestCase):
 py_test(test_mul_op SRCS test_mul_op.py)
 ```
 
-编译完成之后即可执行单测：
+编译时需要打开`WITH_TESTING`, 即 `cmake paddle_dir -DWITH_TESTING=ON`，编译成功之后执行单测命令为：
 
 ```
 make test ARGS="-R test_mul_op -V"

From b709af616f99c7f4e3ab300297608054638886a8 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Tue, 29 Aug 2017 16:21:45 +0800
Subject: [PATCH 149/170] HuberTwoClassification only support one dimension

---
 paddle/gserver/layers/CostLayer.cpp | 31 +++++++++++------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp
index aa4a26a83f..ce071323ff 100644
--- a/paddle/gserver/layers/CostLayer.cpp
+++ b/paddle/gserver/layers/CostLayer.cpp
@@ -672,10 +672,10 @@ void HuberTwoClassification::forwardImp(Matrix& output,
                                         Matrix& target) {
   HuberCost::forwardImp(output, label, target);
   size_t numSamples = target.getHeight();
-  size_t dim = output.getWidth();
   CHECK(label.ids);
   CHECK_EQ((*label.ids).getSize(), numSamples);
   CHECK_EQ(output.getHeight(), numSamples);
+  CHECK_EQ(output.getWidth(), (size_t)1);
   CHECK_EQ(target.getWidth(), (size_t)1);
 
   real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
@@ -683,14 +683,11 @@ void HuberTwoClassification::forwardImp(Matrix& output,
   std::vector<real> cost(numSamples, 0);
   for (size_t i = 0; i < numSamples; ++i) {
     int y = 2 * lbl[i] - 1;
-    for (size_t j = 0; j < dim; ++j) {
-      int index = i * dim + j;
-      real a = out[index] * y;
-      if (a < -1)
-        cost[i] += -4 * a;
-      else if (a < 1)
-        cost[i] += (1 - a) * (1 - a);
-    }
+    real a = out[i] * y;
+    if (a < -1)
+      cost[i] = -4 * a;
+    else if (a < 1)
+      cost[i] = (1 - a) * (1 - a);
   }
   target.copyFrom(cost.data(), numSamples);
 }
@@ -699,22 +696,18 @@ void HuberTwoClassification::backwardImp(Matrix& output,
                                          Argument& label,
                                          Matrix& outputG) {
   size_t numSamples = output.getHeight();
-  size_t dim = output.getWidth();
   real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
   int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData();
   real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData();
   for (size_t i = 0; i < numSamples; ++i) {
     int y = 2 * lbl[i] - 1;
-    for (size_t j = 0; j < dim; ++j) {
-      int index = i * dim + j;
-      real a = out[index] * y;
-      if (a < -1)
-        grad[index] += -4 * y;
-      else if (a < 1)
-        grad[index] += -2 * (1 - a) * y;
-    }
+    real a = out[i] * y;
+    if (a < -1)
+      grad[i] += -4 * y;
+    else if (a < 1)
+      grad[i] += -2 * (1 - a) * y;
   }
-  if (useGpu_) outputG.copyFrom(grad, numSamples * dim);
+  if (useGpu_) outputG.copyFrom(grad, numSamples);
 }
 /**
  * This cost layer compute the sum of its input as loss.

From 751d8533e830eae0ca6b9ee6e62e771a4b72a14b Mon Sep 17 00:00:00 2001
From: qingqing01 <dangqingqing@baidu.com>
Date: Tue, 29 Aug 2017 16:45:20 +0800
Subject: [PATCH 150/170] follow wuyi's comments.

---
 python/paddle/v2/framework/tests/gradient_checker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py
index 02cfb9b2c4..518f828bac 100644
--- a/python/paddle/v2/framework/tests/gradient_checker.py
+++ b/python/paddle/v2/framework/tests/gradient_checker.py
@@ -268,7 +268,7 @@ class GradientChecker(unittest.TestCase):
         :param input_vars: numpy value of input variable. The following
             computation will use these variables.
         :param inputs_to_check: inputs var names that should check gradient.
-        :param output_name: the final output variable name.
+        :param output_name: the output variable name of forward network.
         :param max_relative_error: The relative tolerance parameter.
         :param no_grad_set: used when create backward ops
         :param only_cpu: only compute and check gradient on cpu kernel.

From bfbd066fdd1c4a81266864bf837d89742b3f2ad6 Mon Sep 17 00:00:00 2001
From: tensor-tang <jian.j.tang@intel.com>
Date: Tue, 29 Aug 2017 19:55:44 +0800
Subject: [PATCH 151/170] refine

---
 paddle/gserver/layers/MKLDNNFcLayer.cpp | 117 ++++++++++++------------
 paddle/gserver/layers/MKLDNNFcLayer.h   |   2 +
 paddle/gserver/layers/MKLDNNLayer.h     |  48 +++++++---
 paddle/math/MKLDNNMatrix.cpp            |  25 ++---
 paddle/math/MKLDNNMatrix.h              |  29 +++---
 5 files changed, 118 insertions(+), 103 deletions(-)

diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp
index d38e6a2099..a08cca318e 100644
--- a/paddle/gserver/layers/MKLDNNFcLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp
@@ -77,6 +77,24 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
   wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim);
 }
 
+void MKLDNNFcLayer::convertOutputToOtherDevice() {
+  copyOutputInfoToOtherDevice();
+  // find other cpu device and reorder output to cpu device
+  int cnt = 0;
+  for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
+    if (outputOtherDevice_[i].deviceId == CPU_DEVICE) {
+      // fc cpu output value do not need convert
+      // just share point
+      outputOtherDevice_[i].value = output_.value;
+      ++cnt;
+    }
+  }
+
+  if (cnt > 1) {
+    LOG(WARNING) << "should not have more than one CPU devie";
+  }
+}
+
 void MKLDNNFcLayer::reshape() {
   const Argument& input = getInput(0, getPrev(0)->getDeviceId());
   int batchSize = input.getBatchSize();
@@ -116,7 +134,7 @@ void MKLDNNFcLayer::resetFwd() {
   const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr;
   const MatrixPtr& out = output_.value;
 
-  if (prevIsMKLDNN()) {
+  if (prevIsOnlyMKLDNN()) {
     const MatrixPtr& in = getInputValue(0);
     inVal_ = std::dynamic_pointer_cast<MKLDNNMatrix>(in);
     CHECK(inVal_) << "Input should be MKLDNNMatrix";
@@ -136,30 +154,21 @@ void MKLDNNFcLayer::resetFwd() {
 
   // change original output value to mkldnn output value
   output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
-  if (!nextIsMKLDNN()) {
-    Argument cpuOutput;
-    for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
-      if (outputOtherDevice_[i].deviceId == CPU_DEVICE) {
-        cpuOutput = outputOtherDevice_[i];
-      }
-    }
-    cpuOutput.setFrameHeight(output_.getFrameHeight());
-    cpuOutput.setFrameWidth(output_.getFrameWidth());
-
-    // fc cpu output value do not need convert
-    cpuOutput.value = output_.value;
+  if (!nextIsOnlyMKLDNN()) {
+    convertOutputToOtherDevice();
   }
 
   // create forward handle
   prop_kind pk = prop_kind::forward;
-  fc_fwd::desc fwdDesc =
-      hasBias ? fc_fwd::desc(pk,
-                             inVal_->getMD(),
-                             wgtVal_->getMD(),
-                             biasVal_->getMD(),
-                             outVal_->getMD())
-              : fc_fwd::desc(
-                    pk, inVal_->getMD(), wgtVal_->getMD(), outVal_->getMD());
+  fc_fwd::desc fwdDesc = hasBias ? fc_fwd::desc(pk,
+                                                inVal_->getMemoryDesc(),
+                                                wgtVal_->getMemoryDesc(),
+                                                biasVal_->getMemoryDesc(),
+                                                outVal_->getMemoryDesc())
+                                 : fc_fwd::desc(pk,
+                                                inVal_->getMemoryDesc(),
+                                                wgtVal_->getMemoryDesc(),
+                                                outVal_->getMemoryDesc());
   fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
   if (hasBias) {
     fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_));
@@ -184,36 +193,38 @@ void MKLDNNFcLayer::resetBwd() {
   const MatrixPtr& wgt = weight_->getWGrad();
   const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr;
 
-  // TODO(TJ): merge topdiffs
-  if (nextIsMKLDNN()) {
+  // TODO(TJ): merge outgrad
+  if (nextIsOnlyMKLDNN()) {
     // can not directly cast outputgrad to mkldnnmatrix,
     // since each layer can not write the inputgrad to mkldnn inputgrad.
     // So just create from matrix with outputvalue format.
     const MatrixPtr& out = getOutput(MKLDNN_DEVICE).grad;
-    outGrad_ = MKLDNNMatrix::create(out, outVal_->getPD());
+    outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc());
   } else {
     const MatrixPtr& out = getOutput(CPU_DEVICE).grad;
     // fc do not need to convert from cpu device since output always nc
     // only need create from cpu device
-    outGrad_ = MKLDNNMatrix::create(out, outVal_->getPD());
+    outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc());
   }
 
-  wgtGrad_ = MKLDNNMatrix::create(wgt, wgtVal_->getPD());
-  biasGrad_ = hasBias ? MKLDNNMatrix::create(bias, biasVal_->getPD()) : nullptr;
+  wgtGrad_ = MKLDNNMatrix::create(wgt, wgtVal_->getPrimitiveDesc());
+  biasGrad_ = hasBias ? MKLDNNMatrix::create(bias, biasVal_->getPrimitiveDesc())
+                      : nullptr;
 
   // create memory primitive desc
   fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward,
-                                      inVal_->getMD(),
-                                      wgtGrad_->getMD(),
-                                      outGrad_->getMD());
+                                      inVal_->getMemoryDesc(),
+                                      wgtGrad_->getMemoryDesc(),
+                                      outGrad_->getMemoryDesc());
   fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
-  fc_bwdWgt::desc bwdWgtDesc =
-      hasBias ? fc_bwdWgt::desc(inVal_->getMD(),
-                                wgtGrad_->getMD(),
-                                biasGrad_->getMD(),
-                                outGrad_->getMD())
-              : fc_bwdWgt::desc(
-                    inVal_->getMD(), wgtGrad_->getMD(), outGrad_->getMD());
+  fc_bwdWgt::desc bwdWgtDesc = hasBias
+                                   ? fc_bwdWgt::desc(inVal_->getMemoryDesc(),
+                                                     wgtGrad_->getMemoryDesc(),
+                                                     biasGrad_->getMemoryDesc(),
+                                                     outGrad_->getMemoryDesc())
+                                   : fc_bwdWgt::desc(inVal_->getMemoryDesc(),
+                                                     wgtGrad_->getMemoryDesc(),
+                                                     outGrad_->getMemoryDesc());
   fc_bwdWgt::primitive_desc bwdWgtPD =
       fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD);
 
@@ -227,30 +238,20 @@ void MKLDNNFcLayer::resetBwd() {
   pipelineBwd_.push_back(*bwdWgt_);
 
   /// backward data
-  if (prevIsMKLDNN()) {
-    const MatrixPtr& in = getInputGrad(0, MKLDNN_DEVICE);
-    if (in == nullptr) {
-      return;
-    }
-    if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) {
-      // TODO(TJ): use outputMaps_ ways when merge topdiff done
-    } else {
-      inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD());
-    }
+  int device = prevIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE;
+  const MatrixPtr& in = getInputGrad(0, device);
+  if (in == nullptr) {
+    return;
+  }
+  if (getInput(0, device).getAllCount() > 1) {
+    // TODO(TJ): use outputMaps_ ways when merge outgrad done
   } else {
-    const MatrixPtr& in = getInputGrad(0, CPU_DEVICE);
-    if (in == nullptr) {
-      return;
-    }
-    if (getInput(0, CPU_DEVICE).getAllCount() > 1) {
-      // TODO(TJ): use outputMaps_ ways when merge topdiff done
-    } else {
-      inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD());
-    }
+    inGrad_ = MKLDNNMatrix::create(in, inVal_->getPrimitiveDesc());
   }
 
-  fc_bwdData::desc bwdDataDesc =
-      fc_bwdData::desc(inVal_->getMD(), wgtGrad_->getMD(), outGrad_->getMD());
+  fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(inVal_->getMemoryDesc(),
+                                                  wgtGrad_->getMemoryDesc(),
+                                                  outGrad_->getMemoryDesc());
   fc_bwdData::primitive_desc bwdDataPD =
       fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD);
 
diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h
index e2657a8d5e..e138a6faf1 100644
--- a/paddle/gserver/layers/MKLDNNFcLayer.h
+++ b/paddle/gserver/layers/MKLDNNFcLayer.h
@@ -72,6 +72,8 @@ protected:
    * only would be called when needed
    */
   void resetBwd();
+
+  void convertOutputToOtherDevice() override;
 };
 
 }  // namespace paddle
diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h
index 3dd17a36ff..8fe9630e82 100644
--- a/paddle/gserver/layers/MKLDNNLayer.h
+++ b/paddle/gserver/layers/MKLDNNLayer.h
@@ -86,10 +86,7 @@ public:
     CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn."
                             << "Please set WITH_MKLDNN=ON "
                             << "and set use_mkldnn=True";
-    if (useGpu_ == true) {
-      LOG(WARNING) << "Do not support GPU yet, will change to useGpu = false";
-      useGpu_ = false;
-    }
+    CHECK(!useGpu_) << "Do not support GPU yet";
 
     // set device id before Layer::init
     setDevice(MKLDNN_DEVICE);
@@ -116,6 +113,12 @@ public:
    */
   virtual void convertWeightsToPaddle() {}
 
+  /**
+   * convert MKLDNN output to other device.
+   * only support CPU device yet
+   */
+  virtual void convertOutputToOtherDevice() {}
+
   /**
    * print info about sizes
    */
@@ -147,22 +150,25 @@ public:
 
 protected:
   /**
-   * If next layer only has MKLDNN type.
-   * Otherwise, only support otherdevice CPU device.
+   * copy image size and sequence info to other device
    */
-  bool nextIsMKLDNN() {
+  void copyOutputInfoToOtherDevice() {
     for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
-      CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE)
-          << "Only support other device is CPU yet";
+      outputOtherDevice_[i].setFrameHeight(output_.getFrameHeight());
+      outputOtherDevice_[i].setFrameWidth(output_.getFrameWidth());
+      outputOtherDevice_[i].sequenceStartPositions =
+          output_.sequenceStartPositions;
+      outputOtherDevice_[i].subSequenceStartPositions =
+          output_.subSequenceStartPositions;
+      outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims;
     }
-    return outputOtherDevice_.size() == 0;
   }
 
   /**
-   * Is previous layer MKLDNN type.
-   * Otherwise, only support otherdevice CPU device.
+   * Is previous layer only has MKLDNN type.
+   * Otherwise, only support the previous layer using CPU device.
    */
-  bool prevIsMKLDNN(int index = 0) {
+  bool prevIsOnlyMKLDNN(int index = 0) {
     int prevDevice = getPrev(index)->getDeviceId();
     if (prevDevice == MKLDNN_DEVICE) {
       return true;
@@ -173,11 +179,23 @@ protected:
     }
   }
 
+  /**
+   * If output only has MKLDNN device.
+   * Otherwise, other devices should only using CPU device.
+   */
+  bool nextIsOnlyMKLDNN() {
+    for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
+      CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE)
+          << "Only support other device is CPU yet";
+    }
+    return outputOtherDevice_.size() == 0;
+  }
+
   /**
    * Sync input value data
    */
   void syncInputValue() {
-    if (prevIsMKLDNN()) {
+    if (prevIsOnlyMKLDNN()) {
       return;
     }
     real* iData = getInputValue(0, CPU_DEVICE)->getData();
@@ -190,7 +208,7 @@ protected:
    * Sync output grad data
    */
   void syncOutputGrad() {
-    if (nextIsMKLDNN()) {
+    if (nextIsOnlyMKLDNN()) {
       return;
     }
 
diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp
index 32ae3b1bcf..0a355e2644 100644
--- a/paddle/math/MKLDNNMatrix.cpp
+++ b/paddle/math/MKLDNNMatrix.cpp
@@ -31,7 +31,6 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) {
   if (m == nullptr) {
     size_t height = dims[0];
     size_t width = cnts / dims[0];
-    // LOG(INFO) << height << "," << width;
     m = Matrix::create(height, width, false, false);
   }
 
@@ -40,10 +39,8 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) {
   CHECK(cpuMatrix) << "Only support create from CPU matrix yet";
 
   CHECK_EQ(cnts, m->getElementCnt()) << "Count size does not match";
-  size_t width = m->getWidth();
-  size_t height = m->getHeight();
-  real* data = m->getData();
-  return std::make_shared<MKLDNNMatrix>(data, height, width, pd);
+  return std::make_shared<MKLDNNMatrix>(
+      m->getData(), m->getHeight(), m->getWidth(), pd);
 }
 
 MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m,
@@ -51,9 +48,7 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m,
                                      memory::format fmt,
                                      engine& eg,
                                      mkldnn::memory::data_type dtype) {
-  memory::desc md = memory::desc(dims, dtype, fmt);
-  memory::primitive_desc pd = memory::primitive_desc(md, eg);
-  return create(m, pd);
+  return create(m, memory::primitive_desc(memory::desc(dims, dtype, fmt), eg));
 }
 
 void MKLDNNMatrix::reorderDataFrom(const MKLDNNMatrixPtr& m,
@@ -64,9 +59,7 @@ void MKLDNNMatrix::reorderDataFrom(const MKLDNNMatrixPtr& m,
     return;
   }
   CHECK_EQ(getElementCnt(), m->getElementCnt()) << "size should equal";
-  real* srcData = getData();
-  real* dstData = m->getData();
-  reorderOnce(srcData, dstData, srcFmt, dstFmt, targetDim);
+  reorderOnce(getData(), m->getData(), srcFmt, dstFmt, targetDim);
 }
 
 void MKLDNNMatrix::reorderDataTo(const MKLDNNMatrixPtr& m,
@@ -77,9 +70,7 @@ void MKLDNNMatrix::reorderDataTo(const MKLDNNMatrixPtr& m,
     return;
   }
   CHECK_EQ(getElementCnt(), m->getElementCnt()) << "size should equal";
-  real* srcData = getData();
-  real* dstData = m->getData();
-  reorderOnce(srcData, dstData, srcFmt, dstFmt, targetDim);
+  reorderOnce(getData(), m->getData(), srcFmt, dstFmt, targetDim);
 }
 
 void MKLDNNMatrix::reorderOnce(void* srcData,
@@ -120,8 +111,9 @@ void MKLDNNMatrix::downSpatial() {
     return;
   }
 
-  memory::dims srcDims = getDims();
+  // TODO(TJ): change H(height) and W(width) if support nhwc or more
   const int H = 2, W = 3;
+  memory::dims srcDims = getDims();
   if (srcDims[H] != 1 || srcDims[W] != 1) {
     // can not down spatial
     return;
@@ -141,13 +133,12 @@ void MKLDNNMatrix::downSpatial() {
   }
   memory::desc md = memory::desc(dstDims, getDtype(), dstFmt);
   memory::primitive_desc pd = memory::primitive_desc(md, getEngine());
-  void* data = getData();
   mkldnn_primitive_t result;
   mkldnn::error::wrap_c_api(
       mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr),
       "could not create a memory primitive");
   reset(result);
-  set_data_handle(data);
+  set_data_handle(getData());
 }
 
 }  // namespace paddle
diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h
index ea3fd7d461..e50f698b49 100644
--- a/paddle/math/MKLDNNMatrix.h
+++ b/paddle/math/MKLDNNMatrix.h
@@ -56,9 +56,9 @@ public:
 public:
   /**
    * Reorder this MKLDNNMatrix from other format.
-   * Support inplace reorder
-   * Pay attention: this function would only reorder the data layout.
-   *                will NOT change this original dim or format info
+   * Support inplace reorder.
+   * @note: this function would only reorder the data layout.
+   *        will NOT change this original dim or format info
    */
   void reorderDataFrom(const MKLDNNMatrixPtr& m,
                        memory::format srcFmt,
@@ -66,9 +66,9 @@ public:
 
   /**
    * Reorder this MKLDNNMatrix to other format.
-   * Support inplace reorder
-   * Pay attention: this function would only reorder the data layout.
-   *                will NOT change the dst dim or format info
+   * Support inplace reorder.
+   * @note: this function would only reorder the data layout.
+   *        will NOT change the dst dim or format info
    */
   void reorderDataTo(const MKLDNNMatrixPtr& m,
                      memory::format dstFmt,
@@ -90,18 +90,20 @@ public:
   /**
    * Get primitive descriptor.
    */
-  mkldnn::memory::primitive_desc getPD() { return this->get_primitive_desc(); }
+  mkldnn::memory::primitive_desc getPrimitiveDesc() {
+    return this->get_primitive_desc();
+  }
 
   /**
    * Get memory descriptor.
    */
-  mkldnn::memory::desc getMD() { return getPD().desc(); }
+  mkldnn::memory::desc getMemoryDesc() { return getPrimitiveDesc().desc(); }
 
   /**
    * Get dimensions.
    */
   mkldnn::memory::dims getDims() {
-    mkldnn::memory::desc md = getMD();
+    mkldnn::memory::desc md = getMemoryDesc();
     const int* src = md.data.dims;
     int ndims = md.data.ndims;
     mkldnn::memory::dims dst;
@@ -116,24 +118,25 @@ public:
    * Get format.
    */
   mkldnn::memory::format getFormat() {
-    return (mkldnn::memory::format)(getMD().data.format);
+    return (mkldnn::memory::format)(getMemoryDesc().data.format);
   }
 
   /**
    * Get memory data type.
    */
   mkldnn::memory::data_type getDtype() {
-    return (mkldnn::memory::data_type)(getMD().data.data_type);
+    return (mkldnn::memory::data_type)(getMemoryDesc().data.data_type);
   }
 
   /**
    * Get engine.
    */
-  mkldnn::engine getEngine() { return getPD().get_engine(); }
+  mkldnn::engine getEngine() { return getPrimitiveDesc().get_engine(); }
 
 protected:
   /**
-   * Do once reorder supported inplace.
+   * Do reorder once.
+   * Can support inplace.
    */
   void reorderOnce(void* srcData,
                    void* dstData,

From 34f4f763f9cf52d6c6326613ed839d00ac7c6eb0 Mon Sep 17 00:00:00 2001
From: chengduo <zhaochengduo@baidu.com>
Date: Wed, 30 Aug 2017 10:19:08 +0800
Subject: [PATCH 152/170] Update networks.py

---
 python/paddle/trainer_config_helpers/networks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py
index 28a71cf788..34be203ee2 100644
--- a/python/paddle/trainer_config_helpers/networks.py
+++ b/python/paddle/trainer_config_helpers/networks.py
@@ -1406,7 +1406,7 @@ def inputs(layers, *args):
     if len(args) != 0:
         layers.extend(args)
 
-    Inputs(* [l.name for l in layers])
+    Inputs(*[l.name for l in layers])
 
 
 def outputs(layers, *args):
@@ -1456,7 +1456,7 @@ def outputs(layers, *args):
     assert len(layers) > 0
 
     if HasInputsSet():  # input already set
-        Outputs(* [l.name for l in layers])
+        Outputs(*[l.name for l in layers])
         return  # just return outputs.
 
     if len(layers) != 1:

From 168707caddf9c0ed67a2d87074a5f05b7a63a5c9 Mon Sep 17 00:00:00 2001
From: hedaoyuan <hedaoyuan@github.com>
Date: Wed, 30 Aug 2017 11:35:19 +0800
Subject: [PATCH 153/170] Fix a small bug.

---
 paddle/gserver/layers/ExpandConvLayer.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/paddle/gserver/layers/ExpandConvLayer.cpp b/paddle/gserver/layers/ExpandConvLayer.cpp
index 0e84581769..20de475fc3 100644
--- a/paddle/gserver/layers/ExpandConvLayer.cpp
+++ b/paddle/gserver/layers/ExpandConvLayer.cpp
@@ -66,7 +66,11 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
     // If depth wise convolution and useGpu == false and ARM-NEON
     if (!useGpu_ && isDepthwiseConv(channels_[i], groups_[i]) && !isDeconv_) {
 #if defined(__ARM_NEON__) || defined(__ARM_NEON)
-      convType = "NeonDepthwiseConv";
+      if ((filterSize_[i] == filterSizeY_[i]) &&
+          (filterSize_[i] == 3 || filterSize_[i] == 4) &&
+          (stride_[i] == strideY_[i]) && (stride_[i] == 1 || stride_[i] == 2)) {
+        convType = "NeonDepthwiseConv";
+      }
 #endif
     }
 

From c5183caa04557628340983d17a64097f939db132 Mon Sep 17 00:00:00 2001
From: tensor-tang <jian.j.tang@intel.com>
Date: Wed, 30 Aug 2017 13:37:51 +0800
Subject: [PATCH 154/170] rename

---
 paddle/gserver/layers/MKLDNNFcLayer.cpp | 29 +++++++++++--------------
 paddle/gserver/layers/MKLDNNLayer.h     | 12 +++++-----
 2 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp
index a08cca318e..8318c8c519 100644
--- a/paddle/gserver/layers/MKLDNNFcLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp
@@ -134,7 +134,7 @@ void MKLDNNFcLayer::resetFwd() {
   const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr;
   const MatrixPtr& out = output_.value;
 
-  if (prevIsOnlyMKLDNN()) {
+  if (inputIsOnlyMKLDNN()) {
     const MatrixPtr& in = getInputValue(0);
     inVal_ = std::dynamic_pointer_cast<MKLDNNMatrix>(in);
     CHECK(inVal_) << "Input should be MKLDNNMatrix";
@@ -154,7 +154,7 @@ void MKLDNNFcLayer::resetFwd() {
 
   // change original output value to mkldnn output value
   output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
-  if (!nextIsOnlyMKLDNN()) {
+  if (!outputIsOnlyMKLDNN()) {
     convertOutputToOtherDevice();
   }
 
@@ -194,19 +194,16 @@ void MKLDNNFcLayer::resetBwd() {
   const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr;
 
   // TODO(TJ): merge outgrad
-  if (nextIsOnlyMKLDNN()) {
-    // can not directly cast outputgrad to mkldnnmatrix,
-    // since each layer can not write the inputgrad to mkldnn inputgrad.
-    // So just create from matrix with outputvalue format.
-    const MatrixPtr& out = getOutput(MKLDNN_DEVICE).grad;
-    outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc());
-  } else {
-    const MatrixPtr& out = getOutput(CPU_DEVICE).grad;
-    // fc do not need to convert from cpu device since output always nc
-    // only need create from cpu device
-    outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc());
-  }
-
+  int device = outputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE;
+  // for MKLDNN device:
+  // can not directly cast outputgrad to mkldnnmatrix,
+  // since each layer can not write the inputgrad to mkldnn inputgrad.
+  // So just create from matrix with outputvalue format.
+  // for CPU device:
+  // fc do not need to convert from cpu device since output is always nc format
+  // only need create from cpu device
+  const MatrixPtr& out = getOutput(device).grad;
+  outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc());
   wgtGrad_ = MKLDNNMatrix::create(wgt, wgtVal_->getPrimitiveDesc());
   biasGrad_ = hasBias ? MKLDNNMatrix::create(bias, biasVal_->getPrimitiveDesc())
                       : nullptr;
@@ -238,7 +235,7 @@ void MKLDNNFcLayer::resetBwd() {
   pipelineBwd_.push_back(*bwdWgt_);
 
   /// backward data
-  int device = prevIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE;
+  device = inputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE;
   const MatrixPtr& in = getInputGrad(0, device);
   if (in == nullptr) {
     return;
diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h
index 8fe9630e82..b983b833d5 100644
--- a/paddle/gserver/layers/MKLDNNLayer.h
+++ b/paddle/gserver/layers/MKLDNNLayer.h
@@ -151,6 +151,8 @@ public:
 protected:
   /**
    * copy image size and sequence info to other device
+   * @note: can not directly use Layer::copyOutputToOtherDevice since here only
+   *        copy base info and do not copy data value
    */
   void copyOutputInfoToOtherDevice() {
     for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
@@ -165,10 +167,10 @@ protected:
   }
 
   /**
-   * Is previous layer only has MKLDNN type.
+   * If input only has MKLDNN device.
    * Otherwise, only support the previous layer using CPU device.
    */
-  bool prevIsOnlyMKLDNN(int index = 0) {
+  bool inputIsOnlyMKLDNN(int index = 0) {
     int prevDevice = getPrev(index)->getDeviceId();
     if (prevDevice == MKLDNN_DEVICE) {
       return true;
@@ -183,7 +185,7 @@ protected:
    * If output only has MKLDNN device.
    * Otherwise, other devices should only using CPU device.
    */
-  bool nextIsOnlyMKLDNN() {
+  bool outputIsOnlyMKLDNN() {
     for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
       CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE)
           << "Only support other device is CPU yet";
@@ -195,7 +197,7 @@ protected:
    * Sync input value data
    */
   void syncInputValue() {
-    if (prevIsOnlyMKLDNN()) {
+    if (inputIsOnlyMKLDNN()) {
       return;
     }
     real* iData = getInputValue(0, CPU_DEVICE)->getData();
@@ -208,7 +210,7 @@ protected:
    * Sync output grad data
    */
   void syncOutputGrad() {
-    if (nextIsOnlyMKLDNN()) {
+    if (outputIsOnlyMKLDNN()) {
       return;
     }
 

From 31632a694c718ac31b890b1b46788f9d70d570c8 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Wed, 30 Aug 2017 14:48:03 +0800
Subject: [PATCH 155/170] remove unused ubuntu Debian install doc

---
 doc/getstarted/build_and_install/index_cn.rst |  4 +-
 doc/getstarted/build_and_install/index_en.rst |  3 +-
 .../build_and_install/ubuntu_install_cn.rst   | 71 -------------------
 .../build_and_install/ubuntu_install_en.rst   | 25 -------
 4 files changed, 2 insertions(+), 101 deletions(-)
 delete mode 100644 doc/getstarted/build_and_install/ubuntu_install_cn.rst
 delete mode 100644 doc/getstarted/build_and_install/ubuntu_install_en.rst

diff --git a/doc/getstarted/build_and_install/index_cn.rst b/doc/getstarted/build_and_install/index_cn.rst
index a24df6c518..dd9923697a 100644
--- a/doc/getstarted/build_and_install/index_cn.rst
+++ b/doc/getstarted/build_and_install/index_cn.rst
@@ -6,14 +6,12 @@
 安装流程
 ++++++++
 
-PaddlePaddle提供数个预编译的二进制来进行安装，包括Docker镜像，ubuntu的deb安装包等。我们推荐使用Docker镜像来部署环境，同时欢迎贡献更多的安装包。
+PaddlePaddle提供Docker镜像来部署环境。
 
 .. toctree::
    :maxdepth: 1
    
    docker_install_cn.rst 
-   ubuntu_install_cn.rst
-
 
 
 编译流程
diff --git a/doc/getstarted/build_and_install/index_en.rst b/doc/getstarted/build_and_install/index_en.rst
index 1bfd4f75c0..8a53588e04 100644
--- a/doc/getstarted/build_and_install/index_en.rst
+++ b/doc/getstarted/build_and_install/index_en.rst
@@ -8,14 +8,13 @@ Install PaddlePaddle
     :maxdepth: 1
 
     docker_install_en.rst
-    ubuntu_install_en.rst
 
 Build from Source
 -----------------
 
 ..  warning::
 
-    Please use :code:`deb` package or :code:`docker` image to install paddle. The building guide is used for hacking or contributing PaddlePaddle source code.
+    Please use :code:`docker` image to install paddle. The building guide is used for hacking or contributing PaddlePaddle source code.
 
 ..  toctree::
     :maxdepth: 1
diff --git a/doc/getstarted/build_and_install/ubuntu_install_cn.rst b/doc/getstarted/build_and_install/ubuntu_install_cn.rst
deleted file mode 100644
index 9e39ccb00f..0000000000
--- a/doc/getstarted/build_and_install/ubuntu_install_cn.rst
+++ /dev/null
@@ -1,71 +0,0 @@
-Ubuntu部署PaddlePaddle
-===================================
-
-PaddlePaddle提供了ubuntu 14.04 deb安装包。
-
-安装
-------
-
-安装包的下载地址是\: https://github.com/PaddlePaddle/Paddle/releases
-
-它包含四个版本\:
-
-* cpu版本: 支持主流x86处理器平台, 使用了avx指令集。
-
-* cpu-noavx版本：支持主流x86处理器平台，没有使用avx指令集。
-
-* gpu版本：支持主流x86处理器平台，支持nvidia cuda平台，使用了avx指令集。
-
-* gpu-noavx版本：支持主流x86处理器平台，支持nvidia cuda平台，没有使用avx指令集。
-
-下载完相关安装包后，执行:
-
-..  code-block:: shell
-
-    sudo apt-get install gdebi
-    gdebi paddle-*-cpu.deb
-
-或者:
-
-..  code-block:: shell
-
-    dpkg -i paddle-*-cpu.deb
-    apt-get install -f
-
-
-在 :code:`dpkg -i` 的时候如果报一些依赖未找到的错误是正常的，
-在 :code:`apt-get install -f` 里会继续安装 PaddlePaddle。
-
-安装完成后，可以使用命令 :code:`paddle version` 查看安装后的paddle 版本:
-
-..  code-block:: shell
-
-    PaddlePaddle 0.8.0b1, compiled with
-        with_avx: ON
-        with_gpu: OFF
-        with_double: OFF
-        with_python: ON
-        with_rdma: OFF
-        with_timer: OFF
-        with_predict_sdk:
-
-
-可能遇到的问题
---------------
-
-libcudart.so/libcudnn.so找不到
-++++++++++++++++++++++++++++++
-
-安装完成后，运行 :code:`paddle train` 报错\:
-
-..  code-block:: shell
-
-      0831 12:36:04.151525  1085 hl_dso_loader.cc:70] Check failed: nullptr != *dso_handle For Gpu version of PaddlePaddle, it couldn't find CUDA library: libcudart.so Please make sure you already specify its path.Note: for training data on Cpu using Gpu version of PaddlePaddle,you must specify libcudart.so via LD_LIBRARY_PATH.
-
-原因是未设置cuda运行时环境变量。 如果使用GPU版本的PaddlePaddle，请安装CUDA 7.5 和CUDNN 5到本地环境中，并设置：
-
-..  code-block:: shell
-
-    export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib:$LD_LIBRARY_PATH
-    export PATH=/usr/local/cuda/bin:$PATH
-
diff --git a/doc/getstarted/build_and_install/ubuntu_install_en.rst b/doc/getstarted/build_and_install/ubuntu_install_en.rst
deleted file mode 100644
index ea8042085b..0000000000
--- a/doc/getstarted/build_and_install/ubuntu_install_en.rst
+++ /dev/null
@@ -1,25 +0,0 @@
-Debian Package installation guide
-=================================
-
-PaddlePaddle supports :code:`deb` pacakge. The installation of this :code:`deb` package is tested in ubuntu 14.04, but it should be support other debian based linux, too.
-
-There are four versions of debian package, :code:`cpu`, :code:`gpu`, :code:`cpu-noavx`, :code:`gpu-noavx`. And :code:`noavx` version is used to support CPU which does not contain :code:`AVX` instructions. The download url of :code:`deb` package is \: https://github.com/baidu/Paddle/releases/
-
-
-After downloading PaddlePaddle deb packages, you can use :code:`gdebi` install.
-
-..	code-block:: bash
-
-	gdebi paddle-*.deb
-
-If :code:`gdebi` is not installed, you can use :code:`sudo apt-get install gdebi` to install it.
-
-Or you can use following commands to install PaddlePaddle.
-
-..	code-block:: bash
-
-	dpkg -i paddle-*.deb
-	apt-get install -f
-
-And if you use GPU version deb package, you need to install CUDA toolkit and cuDNN, and set related environment variables(such as LD_LIBRARY_PATH) first. It is normal when `dpkg -i` get errors. `apt-get install -f` will continue install paddle, and install dependences. 
-

From 64791188952437852ad549914a70baea3320f827 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Wed, 30 Aug 2017 17:36:22 +0800
Subject: [PATCH 156/170] fix download mklml error

---
 cmake/external/mklml.cmake | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake
index 51fafb9479..77ea244900 100644
--- a/cmake/external/mklml.cmake
+++ b/cmake/external/mklml.cmake
@@ -54,7 +54,8 @@ ExternalProject_Add(
     ${EXTERNAL_PROJECT_LOG_ARGS}
     PREFIX                ${MKLML_SOURCE_DIR}
     DOWNLOAD_DIR          ${MKLML_DOWNLOAD_DIR}
-    DOWNLOAD_COMMAND      wget --no-check-certificate -qO- ${MKLML_URL} | tar xz -C ${MKLML_DOWNLOAD_DIR}
+    DOWNLOAD_COMMAND      wget --no-check-certificate ${MKLML_URL} -c -O ${MKLML_VER}.tgz 
+                          && tar zxf ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz
     DOWNLOAD_NO_PROGRESS  1
     UPDATE_COMMAND        ""
     CMAKE_ARGS            -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT}

From 2563e32bb12b363c41d608bf0f6f1060ea769f8b Mon Sep 17 00:00:00 2001
From: qijun <qijun1994@hotmail.com>
Date: Wed, 30 Aug 2017 17:57:26 +0800
Subject: [PATCH 157/170] fix clang build error

---
 paddle/gserver/layers/CostLayer.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h
index 0ce72ef40a..0f655b48ee 100644
--- a/paddle/gserver/layers/CostLayer.h
+++ b/paddle/gserver/layers/CostLayer.h
@@ -318,7 +318,9 @@ public:
 
   void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
 
-  void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {}
+  void backwardImp(Matrix& outputValue,
+                   Argument& label,
+                   Matrix& outputGrad) override {}
 };
 
 /**

From f557b0c4c5e1ac97fdc092ed85993c4dda72fd2d Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Thu, 31 Aug 2017 00:11:44 +0800
Subject: [PATCH 158/170] fix data_layer for 3D data

---
 python/paddle/trainer_config_helpers/layers.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index ebb6f36504..c92764e1f9 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -929,11 +929,13 @@ def data_layer(name, size, depth=None, height=None, width=None,
         width=width,
         **ExtraLayerAttribute.to_kwargs(layer_attr))
 
+    if depth is None:
+        depth = 1
     num_filters = None
     if height is not None and width is not None:
-        num_filters = size / (width * height)
-        assert num_filters * width * height == size, \
-            "size=%s width=%s height=%s" % (size, width, height)
+        num_filters = size / (width * height * depth)
+        assert num_filters * width * height * depth == size, \
+                "size=%s width=%s height=%s depth=%s"  % (size, width, height, depth)
 
     return LayerOutput(name, LayerType.DATA, size=size, num_filters=num_filters)
 

From 2ae37a4ea2f4b02ffe6b773590ed05c77675e6f5 Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Thu, 31 Aug 2017 00:28:01 +0800
Subject: [PATCH 159/170] fix data_layer for 3D data

---
 python/paddle/trainer_config_helpers/layers.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 42bf1c19d1..2aa86850d1 100644
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -926,16 +926,18 @@ def data_layer(name, size, height=None, width=None, depth=None,
         type=LayerType.DATA,
         name=name,
         size=size,
+        depth=depth,
         height=height,
         width=width,
-        depth=depth,
         **ExtraLayerAttribute.to_kwargs(layer_attr))
 
+    if depth is None:
+        depth = 1
     num_filters = None
     if height is not None and width is not None:
-        num_filters = size / (width * height)
-        assert num_filters * width * height == size, \
-            "size=%s width=%s height=%s" % (size, width, height)
+        num_filters = size / (width * height * depth)
+        assert num_filters * width * height*depth == size, \
+            "size=%s width=%s height=%s depth=%s" % (size, width, height, depth)
 
     return LayerOutput(name, LayerType.DATA, size=size, num_filters=num_filters)
 

From 09e903eb9417745952ced6db532594fd4a759d74 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Tue, 29 Aug 2017 13:44:51 +0800
Subject: [PATCH 160/170] fix v2 infer interface.

---
 paddle/gserver/layers/CrossEntropyOverBeam.cpp | 1 -
 python/paddle/v2/inference.py                  | 7 +++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
index 500cd6ff8c..bffcc30154 100644
--- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
@@ -39,7 +39,6 @@ void CostForOneSequence::calValidExpandStep() {
     if (start + beamSize_ == findEnd) return;
     goldColIds_[i] = findEnd - start;
   }
-
   if (goldColIds_[beams_->expansionCount - 1] != -1) goldAsExtraPath_ = false;
 }
 
diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py
index 4dcc3ab57e..8acea6155c 100644
--- a/python/paddle/v2/inference.py
+++ b/python/paddle/v2/inference.py
@@ -70,7 +70,7 @@ class Inference(object):
                 item = [each_result[each_field] for each_field in field]
                 yield item
 
-    def infer(self, input, field='value', **kwargs):
+    def infer(self, input, field='value', flatten_result=True, **kwargs):
         """
         Infer a data by model.
         :param input: input data batch. Should be python iterable object.
@@ -83,7 +83,10 @@ class Inference(object):
                 retv = [[] for i in xrange(len(result))]
             for i, item in enumerate(result):
                 retv[i].append(item)
-        retv = [numpy.concatenate(out) for out in retv]
+
+        if flatten_result:
+            retv = [numpy.concatenate(out) for out in retv]
+
         if len(retv) == 1:
             return retv[0]
         else:

From 2e8d47dd09001da94015fb4a96f21452631fcbad Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Thu, 31 Aug 2017 11:01:03 +0800
Subject: [PATCH 161/170] simplify and make quiet in the download of
 mklml.cmake

---
 cmake/external/mklml.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake
index 77ea244900..74f3279831 100644
--- a/cmake/external/mklml.cmake
+++ b/cmake/external/mklml.cmake
@@ -54,8 +54,8 @@ ExternalProject_Add(
     ${EXTERNAL_PROJECT_LOG_ARGS}
     PREFIX                ${MKLML_SOURCE_DIR}
     DOWNLOAD_DIR          ${MKLML_DOWNLOAD_DIR}
-    DOWNLOAD_COMMAND      wget --no-check-certificate ${MKLML_URL} -c -O ${MKLML_VER}.tgz 
-                          && tar zxf ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz
+    DOWNLOAD_COMMAND      wget --no-check-certificate ${MKLML_URL} -c -q -O ${MKLML_VER}.tgz 
+                          && tar zxf ${MKLML_VER}.tgz
     DOWNLOAD_NO_PROGRESS  1
     UPDATE_COMMAND        ""
     CMAKE_ARGS            -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT}

From 2e97045c2354ea8a6ae39ee17e93098a2ec930d4 Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Thu, 31 Aug 2017 14:10:40 +0800
Subject: [PATCH 162/170] fix layers_test.py

---
 .../tests/configs/file_list.sh                |  2 +-
 ...3d_test_config.py => test_conv3d_layer.py} | 44 +---------------
 .../tests/configs/test_deconv3d_layer.py      | 50 +++++++++++++++++++
 .../tests/layers_test.py                      |  3 +-
 4 files changed, 53 insertions(+), 46 deletions(-)
 rename python/paddle/trainer_config_helpers/tests/configs/{conv3d_deconv3d_test_config.py => test_conv3d_layer.py} (51%)
 create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py

diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
index 1ca5c8a07e..729e8e67c2 100755
--- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
@@ -9,6 +9,6 @@ test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
 test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
 test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
 test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer
-test_seq_slice_layer)
+test_seq_slice_layer test_conv3d_layer test_deconv3d_layer)
 
 export whole_configs=(test_split_datasource)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py b/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py
similarity index 51%
rename from python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py
rename to python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py
index 15f7c1d271..aa0a2c0d5f 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py
@@ -14,18 +14,6 @@ padding_y = 1
 padding_z = 1
 groups = 1
 
-data1 = data_layer(name='data1', size=2016 * num_channels, height=48, width=42)
-
-img_conv_layer(
-    input=data1,
-    filter_size=filter_size,
-    num_channels=num_channels,
-    num_filters=16,
-    stride=stride,
-    padding=padding,
-    act=LinearActivation(),
-    bias_attr=False)
-
 data = data_layer(
     name='data', size=12096 * num_channels, height=48, width=42, depth=6)
 # first
@@ -58,34 +46,4 @@ conv3d_2 = img_conv3d_layer(
     trans=False,
     layer_type="conv3d",
     act=LinearActivation())
-
-# first
-deconv3d_1 = img_conv3d_layer(
-    input=data,
-    name='deconv3d_1',
-    num_filters=16,
-    num_channels=num_channels,
-    filter_size=filter_size,
-    stride=stride,
-    padding=padding,
-    groups=groups,
-    bias_attr=True,
-    shared_biases=True,
-    trans=False,
-    layer_type="deconv3d",
-    act=LinearActivation())
-# second
-deconv3d_2 = img_conv3d_layer(
-    input=data,
-    name='deconv3d_2',
-    num_filters=16,
-    num_channels=num_channels,
-    filter_size=[filter_size, filter_size_y, filter_size_z],
-    stride=[stride, stride_y, stride_z],
-    padding=[padding, padding_y, padding_z],
-    groups=groups,
-    bias_attr=True,
-    shared_biases=True,
-    trans=False,
-    layer_type="deconv3d",
-    act=LinearActivation())
+outputs(conv3d_2)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py
new file mode 100644
index 0000000000..a113279fc1
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py
@@ -0,0 +1,50 @@
+from paddle.trainer_config_helpers import *
+
+settings(batch_size=1000, learning_rate=1e-5)
+
+num_channels = 3
+filter_size = 3
+filter_size_y = 3
+filter_size_z = 3
+stride = 2
+stride_y = 2
+stride_z = 2
+padding = 1
+padding_y = 1
+padding_z = 1
+groups = 1
+
+data = data_layer(
+    name='data', size=12096 * num_channels, height=48, width=42, depth=6)
+
+# first
+deconv3d_1 = img_conv3d_layer(
+    input=data,
+    name='deconv3d_1',
+    num_filters=16,
+    num_channels=num_channels,
+    filter_size=filter_size,
+    stride=stride,
+    padding=padding,
+    groups=groups,
+    bias_attr=True,
+    shared_biases=True,
+    trans=True,
+    layer_type="deconv3d",
+    act=LinearActivation())
+# second
+deconv3d_2 = img_conv3d_layer(
+    input=data,
+    name='deconv3d_2',
+    num_filters=16,
+    num_channels=num_channels,
+    filter_size=[filter_size, filter_size_y, filter_size_z],
+    stride=[stride, stride_y, stride_z],
+    padding=[padding, padding_y, padding_z],
+    groups=groups,
+    bias_attr=True,
+    shared_biases=True,
+    trans=True,
+    layer_type="deconv3d",
+    act=LinearActivation())
+outputs(deconv3d_2)
diff --git a/python/paddle/trainer_config_helpers/tests/layers_test.py b/python/paddle/trainer_config_helpers/tests/layers_test.py
index 44d1c1c9b2..b3dd8f8fc7 100644
--- a/python/paddle/trainer_config_helpers/tests/layers_test.py
+++ b/python/paddle/trainer_config_helpers/tests/layers_test.py
@@ -16,6 +16,5 @@ from paddle.trainer.config_parser import parse_config_and_serialize
 
 if __name__ == '__main__':
     parse_config_and_serialize(
-        'trainer_config_helpers/tests/configs/conv3d_deconv3d_test_config.py',
-        '')
+        'trainer_config_helpers/tests/layers_test_config.py', '')
 # layers_test_config.py

From 36f0aa7390e3044b8e26d1787f99ed5edaf27ed0 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Thu, 31 Aug 2017 13:06:22 +0800
Subject: [PATCH 163/170] fix code style to pass CI.

---
 paddle/gserver/layers/CrossEntropyOverBeam.cpp | 11 +++++++----
 paddle/gserver/layers/CrossEntropyOverBeam.h   |  6 +++---
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
index bffcc30154..4acc077035 100644
--- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
@@ -28,8 +28,9 @@ void CostForOneSequence::calValidExpandStep() {
           start,
           start + goldRowIds_[i - 1] * beamSize_ + goldColIds_[i - 1],
           [](const real& val) { return val != -1.; });
-    } else
+    } else {
       goldRowIds_[i] = 0;
+    }
 
     real* start =
         beams_->candidateIds[i]->getData() + goldRowIds_[i] * beamSize_;
@@ -288,7 +289,7 @@ void CrossEntropyOverBeam::copyInputsToCpu() {
 
 void CrossEntropyOverBeam::splitBatchBeams() {
   beamCosts_.resize(batchSize_);
-  beamPerSeq_.resize(batchSize_, beamExpanCount_);
+  beamPerSeq_.resize(batchSize_, BeamExpansion(beamExpanCount_));
 
   for (size_t i = 0; i < beamExpanCount_; ++i) {
     int* seqStarts =
@@ -300,8 +301,9 @@ void CrossEntropyOverBeam::splitBatchBeams() {
       subSeqStarts =
           getInput(i * 3).subSequenceStartPositions->getMutableData(false);
       maxLen = getInput(i * 3).subSequenceStartPositions->getSize() - 1;
-    } else
+    } else {
       maxLen = getInput(i).sequenceStartPositions->getSize() - 1;
+    }
 
     for (size_t j = 0; j < batchSize_; ++j) {
       beamPerSeq_[j].scores[i] =
@@ -348,8 +350,9 @@ void CrossEntropyOverBeam::resizeOutput() {
                              inGrad->getWidth(),
                              false,
                              false);
-    } else
+    } else {
       candidateScoreGrad_[i] = std::move(inGrad);
+    }
     candidateScoreGrad_[i]->zeroMem();
   }
 }
diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.h b/paddle/gserver/layers/CrossEntropyOverBeam.h
index 5d0cffee3c..5643556f43 100644
--- a/paddle/gserver/layers/CrossEntropyOverBeam.h
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.h
@@ -31,7 +31,7 @@ struct BeamExpansion {
 
   size_t expansionCount;
 
-  BeamExpansion(int n) {
+  explicit BeamExpansion(int n) {
     expansionCount = n;
     scores.resize(expansionCount);
     seqInfo.resize(expansionCount);
@@ -39,7 +39,7 @@ struct BeamExpansion {
     scoreGrad.resize(expansionCount);
 
     gold.resize(expansionCount);
-  };
+  }
 };
 typedef std::shared_ptr<BeamExpansion> BeamExpansionPtr;
 
@@ -74,7 +74,7 @@ private:
     CHECK_GT(beams_->seqInfo[beamId]->getSize() - 1, rowId);
     int* starts = beams_->seqInfo[beamId]->getData();
     return starts[rowId] - starts[0];
-  };
+  }
 
   size_t beamSize_;
   size_t validExpansionCount_;

From d747c5d5119b7e564b9b7dcc7d7528ac91972712 Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Thu, 31 Aug 2017 13:57:59 +0800
Subject: [PATCH 164/170] fix layers_test.py

---
 paddle/cuda/src/hl_cuda_cnn.cu                               | 5 +++--
 paddle/parameter/Argument.h                                  | 3 +++
 .../paddle/trainer_config_helpers/tests/configs/file_list.sh | 2 +-
 python/paddle/trainer_config_helpers/tests/layers_test.py    | 2 +-
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/paddle/cuda/src/hl_cuda_cnn.cu b/paddle/cuda/src/hl_cuda_cnn.cu
index 95440c9446..9ba3d14261 100644
--- a/paddle/cuda/src/hl_cuda_cnn.cu
+++ b/paddle/cuda/src/hl_cuda_cnn.cu
@@ -1,8 +1,11 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
+
     http://www.apache.org/licenses/LICENSE-2.0
+
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -350,7 +353,6 @@ void hl_avgpool_backward(const int frameCnt,
   CHECK_SYNC("hl_avgpool_backward failed");
 }
 
-/////////////////
 __global__ void KeMaxPool3DForward(const int nthreads,
                                    const real* inputData,
                                    const int channels,
@@ -777,7 +779,6 @@ void hl_avgpool3D_backward(const int frameCnt,
                                                            outStride);
   CHECK_SYNC("hl_avgpool3D_backward failed");
 }
-/////////////////
 
 __global__ void KeBilinearInterpFw(const real* in,
                                    const size_t inImgH,
diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h
index 7b59199dde..9ed63462b1 100644
--- a/paddle/parameter/Argument.h
+++ b/paddle/parameter/Argument.h
@@ -1,8 +1,11 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
+
     http://www.apache.org/licenses/LICENSE-2.0
+
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
index 1ca5c8a07e..e7dc08c6dd 100755
--- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
@@ -9,6 +9,6 @@ test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
 test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
 test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
 test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer
-test_seq_slice_layer)
+test_seq_slice_layer test_pooling3D_layer)
 
 export whole_configs=(test_split_datasource)
diff --git a/python/paddle/trainer_config_helpers/tests/layers_test.py b/python/paddle/trainer_config_helpers/tests/layers_test.py
index 52218972bf..05902ea293 100644
--- a/python/paddle/trainer_config_helpers/tests/layers_test.py
+++ b/python/paddle/trainer_config_helpers/tests/layers_test.py
@@ -16,4 +16,4 @@ from paddle.trainer.config_parser import parse_config_and_serialize
 
 if __name__ == '__main__':
     parse_config_and_serialize(
-        'trainer_config_helpers/tests/configs/test_pooling3D_layer.py', '')
+        'trainer_config_helpers/tests/layers_test_config.py', '')

From 3e1f56fa2bfdaddc42dc716c099ffdce229a7068 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Thu, 31 Aug 2017 15:44:24 +0800
Subject: [PATCH 165/170] don't need to specify the path of libwarpctc.so,
 refine the python api doc

---
 python/paddle/trainer_config_helpers/layers.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index e73098910c..b2ba16333b 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -5065,17 +5065,6 @@ def warp_ctc_layer(input,
     building process, PaddlePaddle will clone the source codes, build and
     install it to :code:`third_party/install/warpctc` directory.
 
-    To use warp_ctc layer, you need to specify the path of :code:`libwarpctc.so`,
-    using following methods:
-
-    1. Set it in :code:`paddle.init` (python api) or :code:`paddle_init` (c api),
-    such as :code:`paddle.init(use_gpu=True,
-    warpctc_dir=your_paddle_source_dir/third_party/install/warpctc/lib)`.
-
-    2. Set environment variable LD_LIBRARY_PATH on Linux or DYLD_LIBRARY_PATH
-    on Mac OS. For instance, :code:`export
-    LD_LIBRARY_PATH=your_paddle_source_dir/third_party/install/warpctc/lib:$LD_LIBRARY_PATH`.
-
     More details of CTC can be found by referring to `Connectionist Temporal
     Classification: Labelling Unsegmented Sequence Data with Recurrent
     Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/

From a4e1e127f3aa5a64cc777deab31a410874fd7ff7 Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Thu, 31 Aug 2017 16:33:01 +0800
Subject: [PATCH 166/170] Add
 test_conv3d_layer.protostr,test_deconv3d_layer.protostr

---
 .../protostr/test_conv3d_layer.protostr       | 132 ++++++++++++++++++
 .../protostr/test_deconv3d_layer.protostr     | 132 ++++++++++++++++++
 2 files changed, 264 insertions(+)
 create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_conv3d_layer.protostr
 create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_deconv3d_layer.protostr

diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_conv3d_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_conv3d_layer.protostr
new file mode 100644
index 0000000000..9fe2bc29d3
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_conv3d_layer.protostr
@@ -0,0 +1,132 @@
+type: "nn"
+layers {
+  name: "data"
+  type: "data"
+  size: 36288
+  active_type: ""
+  height: 48
+  width: 42
+  depth: 6
+}
+layers {
+  name: "conv3d_1"
+  type: "conv3d"
+  size: 24192
+  active_type: ""
+  inputs {
+    input_layer_name: "data"
+    input_parameter_name: "_conv3d_1.w0"
+    conv_conf {
+      filter_size: 3
+      channels: 3
+      stride: 2
+      padding: 1
+      groups: 1
+      filter_channels: 3
+      output_x: 21
+      img_size: 42
+      caffe_mode: true
+      filter_size_y: 3
+      padding_y: 1
+      stride_y: 2
+      output_y: 24
+      img_size_y: 48
+      filter_size_z: 3
+      padding_z: 1
+      stride_z: 2
+      output_z: 3
+      img_size_z: 6
+    }
+  }
+  bias_parameter_name: "_conv3d_1.wbias"
+  num_filters: 16
+  shared_biases: true
+  height: 24
+  width: 21
+  depth: 3
+}
+layers {
+  name: "conv3d_2"
+  type: "conv3d"
+  size: 24192
+  active_type: ""
+  inputs {
+    input_layer_name: "data"
+    input_parameter_name: "_conv3d_2.w0"
+    conv_conf {
+      filter_size: 3
+      channels: 3
+      stride: 2
+      padding: 1
+      groups: 1
+      filter_channels: 3
+      output_x: 21
+      img_size: 42
+      caffe_mode: true
+      filter_size_y: 3
+      padding_y: 1
+      stride_y: 2
+      output_y: 24
+      img_size_y: 48
+      filter_size_z: 3
+      padding_z: 1
+      stride_z: 2
+      output_z: 3
+      img_size_z: 6
+    }
+  }
+  bias_parameter_name: "_conv3d_2.wbias"
+  num_filters: 16
+  shared_biases: true
+  height: 24
+  width: 21
+  depth: 3
+}
+parameters {
+  name: "_conv3d_1.w0"
+  size: 1296
+  initial_mean: 0.0
+  initial_std: 0.272165526976
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_conv3d_1.wbias"
+  size: 16
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 16
+  dims: 1
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_conv3d_2.w0"
+  size: 1296
+  initial_mean: 0.0
+  initial_std: 0.272165526976
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_conv3d_2.wbias"
+  size: 16
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 16
+  dims: 1
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "data"
+output_layer_names: "conv3d_2"
+sub_models {
+  name: "root"
+  layer_names: "data"
+  layer_names: "conv3d_1"
+  layer_names: "conv3d_2"
+  input_layer_names: "data"
+  output_layer_names: "conv3d_2"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_deconv3d_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_deconv3d_layer.protostr
new file mode 100644
index 0000000000..7bf409731c
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_deconv3d_layer.protostr
@@ -0,0 +1,132 @@
+type: "nn"
+layers {
+  name: "data"
+  type: "data"
+  size: 36288
+  active_type: ""
+  height: 48
+  width: 42
+  depth: 6
+}
+layers {
+  name: "deconv3d_1"
+  type: "deconv3d"
+  size: 1387760
+  active_type: ""
+  inputs {
+    input_layer_name: "data"
+    input_parameter_name: "_deconv3d_1.w0"
+    conv_conf {
+      filter_size: 3
+      channels: 3
+      stride: 2
+      padding: 1
+      groups: 1
+      filter_channels: 16
+      output_x: 42
+      img_size: 83
+      caffe_mode: true
+      filter_size_y: 3
+      padding_y: 1
+      stride_y: 2
+      output_y: 48
+      img_size_y: 95
+      filter_size_z: 3
+      padding_z: 1
+      stride_z: 2
+      output_z: 6
+      img_size_z: 11
+    }
+  }
+  bias_parameter_name: "_deconv3d_1.wbias"
+  num_filters: 16
+  shared_biases: true
+  height: 95
+  width: 83
+  depth: 11
+}
+layers {
+  name: "deconv3d_2"
+  type: "deconv3d"
+  size: 1387760
+  active_type: ""
+  inputs {
+    input_layer_name: "data"
+    input_parameter_name: "_deconv3d_2.w0"
+    conv_conf {
+      filter_size: 3
+      channels: 3
+      stride: 2
+      padding: 1
+      groups: 1
+      filter_channels: 16
+      output_x: 42
+      img_size: 83
+      caffe_mode: true
+      filter_size_y: 3
+      padding_y: 1
+      stride_y: 2
+      output_y: 48
+      img_size_y: 95
+      filter_size_z: 3
+      padding_z: 1
+      stride_z: 2
+      output_z: 6
+      img_size_z: 11
+    }
+  }
+  bias_parameter_name: "_deconv3d_2.wbias"
+  num_filters: 16
+  shared_biases: true
+  height: 95
+  width: 83
+  depth: 11
+}
+parameters {
+  name: "_deconv3d_1.w0"
+  size: 6912
+  initial_mean: 0.0
+  initial_std: 0.272165526976
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_deconv3d_1.wbias"
+  size: 16
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 16
+  dims: 1
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_deconv3d_2.w0"
+  size: 6912
+  initial_mean: 0.0
+  initial_std: 0.272165526976
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_deconv3d_2.wbias"
+  size: 16
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 16
+  dims: 1
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "data"
+output_layer_names: "deconv3d_2"
+sub_models {
+  name: "root"
+  layer_names: "data"
+  layer_names: "deconv3d_1"
+  layer_names: "deconv3d_2"
+  input_layer_names: "data"
+  output_layer_names: "deconv3d_2"
+  is_recurrent_layer_group: false
+}
+

From d394a1447125af9f6fc2b43c936d36a61662dc0e Mon Sep 17 00:00:00 2001
From: Liu Yiqun <liuyiqun01@baidu.com>
Date: Thu, 31 Aug 2017 17:05:39 +0800
Subject: [PATCH 167/170] Remove unused incluing file net/if_arp.h.

---
 paddle/pserver/LightNetwork.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/paddle/pserver/LightNetwork.cpp b/paddle/pserver/LightNetwork.cpp
index 8616fd2d5a..4203f26164 100644
--- a/paddle/pserver/LightNetwork.cpp
+++ b/paddle/pserver/LightNetwork.cpp
@@ -22,7 +22,6 @@ limitations under the License. */
 
 #include <arpa/inet.h>
 #include <net/if.h>
-#include <net/if_arp.h>
 #include <sys/ioctl.h>
 #include <sstream>
 

From 3bafa42b1afe10aa6ab712d1d258bb079ac814ea Mon Sep 17 00:00:00 2001
From: qijun <qijun1994@hotmail.com>
Date: Thu, 31 Aug 2017 18:08:44 +0800
Subject: [PATCH 168/170] fix tensor copyfrom bug

---
 paddle/framework/tensor_impl.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h
index 7d7263b899..7893e233b7 100644
--- a/paddle/framework/tensor_impl.h
+++ b/paddle/framework/tensor_impl.h
@@ -117,6 +117,8 @@ inline void Tensor::CopyFrom(const Tensor& src,
     memory::Copy(boost::get<platform::GPUPlace>(dst_place), dst_ptr,
                  boost::get<platform::GPUPlace>(src_place), src_ptr, size, 0);
   }
+  PADDLE_ENFORCE(cudaStreamSynchronize(0),
+                 "cudaStreamSynchronize failed in Tensor CopyFrom");
 
 #endif
 }

From c54c7d91a0c098bf22ba399aee15ebb421de1bfb Mon Sep 17 00:00:00 2001
From: Liu Yiqun <liuyiqun01@baidu.com>
Date: Fri, 1 Sep 2017 16:01:53 +0800
Subject: [PATCH 169/170] Use template to deliver const argument instead, to
 remove the compiling error "argument to __builtin_neon_vgetq_lane_f32 must be
 a constant integer".

---
 paddle/function/neon/NeonDepthwiseConv.cpp | 100 ++++++++++-----------
 paddle/function/neon/neon_util.h           |   4 +-
 2 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp
index f09e98587d..14e5198e1b 100644
--- a/paddle/function/neon/NeonDepthwiseConv.cpp
+++ b/paddle/function/neon/NeonDepthwiseConv.cpp
@@ -116,15 +116,15 @@ struct DepthwiseConvKernel<3, 1> {
 
           float32x4_t tmp1 = vdupq_n_f32(0.f);
           float32x4_t tmp2 = vdupq_n_f32(0.f);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[1][0], k[1], 0);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[1][1], k[1], 1);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[1][2], k[1], 2);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2);
+          tmp1 = vmlaq_laneq_f32<0>(tmp1, input[0][0], k[0]);
+          tmp2 = vmlaq_laneq_f32<1>(tmp2, input[0][1], k[0]);
+          tmp1 = vmlaq_laneq_f32<2>(tmp1, input[0][2], k[0]);
+          tmp2 = vmlaq_laneq_f32<0>(tmp2, input[1][0], k[1]);
+          tmp1 = vmlaq_laneq_f32<1>(tmp1, input[1][1], k[1]);
+          tmp2 = vmlaq_laneq_f32<2>(tmp2, input[1][2], k[1]);
+          tmp1 = vmlaq_laneq_f32<0>(tmp1, input[2][0], k[2]);
+          tmp2 = vmlaq_laneq_f32<1>(tmp2, input[2][1], k[2]);
+          tmp1 = vmlaq_laneq_f32<2>(tmp1, input[2][2], k[2]);
           tmp1 = vaddq_f32(tmp1, tmp2);
 
           vst1q_f32(outputData, tmp1);
@@ -223,15 +223,15 @@ struct DepthwiseConvKernel<3, 2> {
 
           float32x4_t tmp1 = vdupq_n_f32(0.f);
           float32x4_t tmp2 = vdupq_n_f32(0.f);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[1][0], k[1], 0);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[1][1], k[1], 1);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[1][2], k[1], 2);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2);
+          tmp1 = vmlaq_laneq_f32<0>(tmp1, input[0][0], k[0]);
+          tmp2 = vmlaq_laneq_f32<1>(tmp2, input[0][1], k[0]);
+          tmp1 = vmlaq_laneq_f32<2>(tmp1, input[0][2], k[0]);
+          tmp2 = vmlaq_laneq_f32<0>(tmp2, input[1][0], k[1]);
+          tmp1 = vmlaq_laneq_f32<1>(tmp1, input[1][1], k[1]);
+          tmp2 = vmlaq_laneq_f32<2>(tmp2, input[1][2], k[1]);
+          tmp1 = vmlaq_laneq_f32<0>(tmp1, input[2][0], k[2]);
+          tmp2 = vmlaq_laneq_f32<1>(tmp2, input[2][1], k[2]);
+          tmp1 = vmlaq_laneq_f32<2>(tmp1, input[2][2], k[2]);
           tmp1 = vaddq_f32(tmp1, tmp2);
 
           vst1q_f32(outputData, tmp1);
@@ -316,22 +316,22 @@ struct DepthwiseConvKernel<4, 1> {
 
           float32x4_t tmp1 = vdupq_n_f32(0.f);
           float32x4_t tmp2 = vdupq_n_f32(0.f);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[0][3], k[0], 3);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[1][0], k[1], 0);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[1][1], k[1], 1);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[1][2], k[1], 2);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[1][3], k[1], 3);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[2][3], k[2], 3);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[3][0], k[3], 0);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[3][1], k[3], 1);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[3][2], k[3], 2);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[3][3], k[3], 3);
+          tmp1 = vmlaq_laneq_f32<0>(tmp1, input[0][0], k[0]);
+          tmp2 = vmlaq_laneq_f32<1>(tmp2, input[0][1], k[0]);
+          tmp1 = vmlaq_laneq_f32<2>(tmp1, input[0][2], k[0]);
+          tmp2 = vmlaq_laneq_f32<3>(tmp2, input[0][3], k[0]);
+          tmp1 = vmlaq_laneq_f32<0>(tmp1, input[1][0], k[1]);
+          tmp2 = vmlaq_laneq_f32<1>(tmp2, input[1][1], k[1]);
+          tmp1 = vmlaq_laneq_f32<2>(tmp1, input[1][2], k[1]);
+          tmp2 = vmlaq_laneq_f32<3>(tmp2, input[1][3], k[1]);
+          tmp1 = vmlaq_laneq_f32<0>(tmp1, input[2][0], k[2]);
+          tmp2 = vmlaq_laneq_f32<1>(tmp2, input[2][1], k[2]);
+          tmp1 = vmlaq_laneq_f32<2>(tmp1, input[2][2], k[2]);
+          tmp2 = vmlaq_laneq_f32<3>(tmp2, input[2][3], k[2]);
+          tmp1 = vmlaq_laneq_f32<0>(tmp1, input[3][0], k[3]);
+          tmp2 = vmlaq_laneq_f32<1>(tmp2, input[3][1], k[3]);
+          tmp1 = vmlaq_laneq_f32<2>(tmp1, input[3][2], k[3]);
+          tmp2 = vmlaq_laneq_f32<3>(tmp2, input[3][3], k[3]);
           tmp1 = vaddq_f32(tmp1, tmp2);
 
           vst1q_f32(outputData, tmp1);
@@ -431,22 +431,22 @@ struct DepthwiseConvKernel<4, 2> {
 
           float32x4_t tmp1 = vdupq_n_f32(0.f);
           float32x4_t tmp2 = vdupq_n_f32(0.f);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[0][3], k[0], 3);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[1][0], k[1], 0);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[1][1], k[1], 1);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[1][2], k[1], 2);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[1][3], k[1], 3);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[2][3], k[2], 3);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[3][0], k[3], 0);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[3][1], k[3], 1);
-          tmp1 = vmlaq_laneq_f32(tmp1, input[3][2], k[3], 2);
-          tmp2 = vmlaq_laneq_f32(tmp2, input[3][3], k[3], 3);
+          tmp1 = vmlaq_laneq_f32<0>(tmp1, input[0][0], k[0]);
+          tmp2 = vmlaq_laneq_f32<1>(tmp2, input[0][1], k[0]);
+          tmp1 = vmlaq_laneq_f32<2>(tmp1, input[0][2], k[0]);
+          tmp2 = vmlaq_laneq_f32<3>(tmp2, input[0][3], k[0]);
+          tmp1 = vmlaq_laneq_f32<0>(tmp1, input[1][0], k[1]);
+          tmp2 = vmlaq_laneq_f32<1>(tmp2, input[1][1], k[1]);
+          tmp1 = vmlaq_laneq_f32<2>(tmp1, input[1][2], k[1]);
+          tmp2 = vmlaq_laneq_f32<3>(tmp2, input[1][3], k[1]);
+          tmp1 = vmlaq_laneq_f32<0>(tmp1, input[2][0], k[2]);
+          tmp2 = vmlaq_laneq_f32<1>(tmp2, input[2][1], k[2]);
+          tmp1 = vmlaq_laneq_f32<2>(tmp1, input[2][2], k[2]);
+          tmp2 = vmlaq_laneq_f32<3>(tmp2, input[2][3], k[2]);
+          tmp1 = vmlaq_laneq_f32<0>(tmp1, input[3][0], k[3]);
+          tmp2 = vmlaq_laneq_f32<1>(tmp2, input[3][1], k[3]);
+          tmp1 = vmlaq_laneq_f32<2>(tmp1, input[3][2], k[3]);
+          tmp2 = vmlaq_laneq_f32<3>(tmp2, input[3][3], k[3]);
           tmp1 = vaddq_f32(tmp1, tmp2);
 
           vst1q_f32(outputData, tmp1);
diff --git a/paddle/function/neon/neon_util.h b/paddle/function/neon/neon_util.h
index 56b3febe2d..dbe017170b 100644
--- a/paddle/function/neon/neon_util.h
+++ b/paddle/function/neon/neon_util.h
@@ -33,10 +33,10 @@ inline float32_t vaddvq_f32(float32x4_t a) {
   return vget_lane_f32(vpadd_f32(v, v), 0);
 }
 
+template <int lane>
 inline float32x4_t vmlaq_laneq_f32(float32x4_t a,
                                    float32x4_t b,
-                                   float32x4_t v,
-                                   const int lane) {
+                                   float32x4_t v) {
   return vmlaq_n_f32(a, b, vgetq_lane_f32(v, lane));
 }
 #endif

From 8b15ac82fa831f95493c2bd218b93655db0d739e Mon Sep 17 00:00:00 2001
From: Liu Yiqun <liuyiqun01@baidu.com>
Date: Fri, 1 Sep 2017 17:50:01 +0800
Subject: [PATCH 170/170] Move the definition of hl_cpu_gru_forward and
 hl_cpu_gru_backward to function/GruFunctor.h.

---
 paddle/cuda/include/hl_cpu_gru.cuh     | 134 ---------------------
 paddle/function/GruFunctor.h           | 160 +++++++++++++++++++++++++
 paddle/gserver/layers/GruCompute.cpp   |  32 ++---
 paddle/scripts/docker/build_android.sh |  25 +---
 4 files changed, 181 insertions(+), 170 deletions(-)
 create mode 100644 paddle/function/GruFunctor.h

diff --git a/paddle/cuda/include/hl_cpu_gru.cuh b/paddle/cuda/include/hl_cpu_gru.cuh
index 732799a28b..347b038598 100644
--- a/paddle/cuda/include/hl_cpu_gru.cuh
+++ b/paddle/cuda/include/hl_cpu_gru.cuh
@@ -18,14 +18,6 @@ limitations under the License. */
 
 #ifndef __NVCC__
 
-#include "paddle/math/MathFunctions.h"
-
-// #ifndef PADDLE_TYPE_DOUBLE
-// #define     CBLAS_GEMM     paddle::gemm<float>
-// #else
-// #define     CBLAS_GEMM     paddle::gemm<double>
-// #endif
-
 template<class OpResetOutput>
 void hl_naive_gru_forward_reset_output(OpResetOutput opResetOutput,
                                        real *gateValue,
@@ -210,51 +202,6 @@ inline void forward_final_output(OpFinalOutput opFinalOutput,
   }
 }
 
-template<class OpResetOutput, class OpFinalOutput>
-void hl_cpu_gru_forward(OpResetOutput opResetOutput,
-                        OpFinalOutput opFinalOutput,
-                        hl_gru_value value,
-                        int frameSize,
-                        int batchSize,
-                        hl_activation_mode_t active_node,
-                        hl_activation_mode_t active_gate) {
-  if (value.prevOutValue) {
-//     CBLAS_GEMM(CblasNoTrans,
-//                CblasNoTrans,
-//                batchSize,
-//                2 * frameSize,
-//                frameSize,
-//                1,
-//                value.prevOutValue,
-//                frameSize,
-//                value.gateWeight,
-//                frameSize * 2,
-//                1,
-//                value.gateValue,
-//                frameSize * 3);
-  }
-
-  forward_reset_output(opResetOutput, value, frameSize, batchSize, active_gate);
-
-  if (value.prevOutValue) {
-//    CBLAS_GEMM(CblasNoTrans,
-//               CblasNoTrans,
-//               batchSize,
-//               frameSize,
-//               frameSize,
-//               1,
-//               value.resetOutputValue,
-//               frameSize,
-//               value.stateWeight,
-//               frameSize,
-//               1,
-//               value.gateValue + frameSize * 2,
-//               frameSize * 3);
-  }
-
-  forward_final_output(opFinalOutput, value, frameSize, batchSize, active_node);
-}
-
 template<class OpStateGrad>
 void hl_naive_gru_backward_state_grad(OpStateGrad opStateGrad,
                                       real *gateValue,
@@ -524,87 +471,6 @@ inline void backward_reset_grad(OpResetGrad opResetGrad,
     }
   }
 }
-
-template<class OpStateGrad, class OpResetGrad>
-void hl_cpu_gru_backward(OpStateGrad opStateGrad,
-                         OpResetGrad opResetGrad,
-                         hl_gru_value value,
-                         hl_gru_grad  grad,
-                         int frameSize,
-                         int batchSize,
-                         hl_activation_mode_t active_node,
-                         hl_activation_mode_t active_gate) {
-  backward_state_grad(opStateGrad, value, grad,
-    frameSize, batchSize, active_node);
-
-  if (value.prevOutValue && grad.prevOutGrad) {
-//     CBLAS_GEMM(CblasNoTrans,
-//                CblasTrans,
-//                batchSize,
-//                frameSize,
-//                frameSize,
-//                1,
-//                grad.gateGrad + frameSize * 2,
-//                frameSize * 3,
-//                value.stateWeight,
-//                frameSize,
-//                0,
-//                grad.resetOutputGrad,
-//                frameSize);
-
-    if (grad.stateWeightGrad) {
-//       CBLAS_GEMM(CblasTrans,
-//                  CblasNoTrans,
-//                  frameSize,
-//                  frameSize,
-//                  batchSize,
-//                  1,
-//                  value.resetOutputValue,
-//                  frameSize,
-//                  grad.gateGrad + frameSize * 2,
-//                  frameSize * 3,
-//                  1,
-//                  grad.stateWeightGrad,
-//                  frameSize);
-    }
-  }
-
-  backward_reset_grad(opResetGrad, value, grad,
-    frameSize, batchSize, active_gate);
-
-  if (grad.prevOutGrad && value.prevOutValue) {
-//     CBLAS_GEMM(CblasNoTrans,
-//                CblasTrans,
-//                batchSize,
-//                frameSize,
-//                frameSize * 2,
-//                1,
-//                grad.gateGrad,
-//                frameSize * 3,
-//                value.gateWeight,
-//                frameSize * 2,
-//                1,
-//                grad.prevOutGrad,
-//                frameSize);
-
-    if (grad.gateWeightGrad) {
-//       CBLAS_GEMM(CblasTrans,
-//                  CblasNoTrans,
-//                  frameSize,
-//                  frameSize * 2,
-//                  batchSize,
-//                  1,
-//                  value.prevOutValue,
-//                  frameSize,
-//                  grad.gateGrad,
-//                  frameSize * 3,
-//                  1,
-//                  grad.gateWeightGrad,
-//                  frameSize * 2);
-    }
-  }
-}
-
 #endif
 
 #endif  // HL_CPU_GRU_CUH_
diff --git a/paddle/function/GruFunctor.h b/paddle/function/GruFunctor.h
new file mode 100644
index 0000000000..11f6174dbd
--- /dev/null
+++ b/paddle/function/GruFunctor.h
@@ -0,0 +1,160 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "GemmFunctor.h"
+#include "GruFunctor.h"
+#include "hl_cpu_gru.cuh"
+
+namespace paddle {
+
+template <DeviceType Device, class T>
+struct GruFunctor {
+  template <class OpResetOutput, class OpFinalOutput>
+  static void compute(OpResetOutput opResetOutput,
+                      OpFinalOutput opFinalOutput,
+                      hl_gru_value value,
+                      int frameSize,
+                      int batchSize,
+                      hl_activation_mode_t active_node,
+                      hl_activation_mode_t active_gate) {
+#ifndef __NVCC__
+    if (value.prevOutValue) {
+      BlasGemm<Device, T>::compute(false,
+                                   false,
+                                   batchSize,
+                                   2 * frameSize,
+                                   frameSize,
+                                   1,
+                                   value.prevOutValue,
+                                   frameSize,
+                                   value.gateWeight,
+                                   frameSize * 2,
+                                   1,
+                                   value.gateValue,
+                                   frameSize * 3);
+    }
+
+    forward_reset_output(
+        opResetOutput, value, frameSize, batchSize, active_gate);
+
+    if (value.prevOutValue) {
+      BlasGemm<Device, T>::compute(false,
+                                   false,
+                                   batchSize,
+                                   frameSize,
+                                   frameSize,
+                                   1,
+                                   value.resetOutputValue,
+                                   frameSize,
+                                   value.stateWeight,
+                                   frameSize,
+                                   1,
+                                   value.gateValue + frameSize * 2,
+                                   frameSize * 3);
+    }
+
+    forward_final_output(
+        opFinalOutput, value, frameSize, batchSize, active_node);
+#endif
+  }
+};
+
+template <DeviceType Device, class T>
+struct GruGradFunctor {
+  template <class OpStateGrad, class OpResetGrad>
+  static void compute(OpStateGrad opStateGrad,
+                      OpResetGrad opResetGrad,
+                      hl_gru_value value,
+                      hl_gru_grad grad,
+                      int frameSize,
+                      int batchSize,
+                      hl_activation_mode_t active_node,
+                      hl_activation_mode_t active_gate) {
+#ifndef __NVCC__
+    backward_state_grad(
+        opStateGrad, value, grad, frameSize, batchSize, active_node);
+
+    if (value.prevOutValue && grad.prevOutGrad) {
+      BlasGemm<Device, T>::compute(false,
+                                   true,
+                                   batchSize,
+                                   frameSize,
+                                   frameSize,
+                                   1,
+                                   grad.gateGrad + frameSize * 2,
+                                   frameSize * 3,
+                                   value.stateWeight,
+                                   frameSize,
+                                   0,
+                                   grad.resetOutputGrad,
+                                   frameSize);
+
+      if (grad.stateWeightGrad) {
+        BlasGemm<Device, T>::compute(true,
+                                     false,
+                                     frameSize,
+                                     frameSize,
+                                     batchSize,
+                                     1,
+                                     value.resetOutputValue,
+                                     frameSize,
+                                     grad.gateGrad + frameSize * 2,
+                                     frameSize * 3,
+                                     1,
+                                     grad.stateWeightGrad,
+                                     frameSize);
+      }
+    }
+
+    backward_reset_grad(
+        opResetGrad, value, grad, frameSize, batchSize, active_gate);
+
+    if (grad.prevOutGrad && value.prevOutValue) {
+      BlasGemm<Device, T>::compute(false,
+                                   true,
+                                   batchSize,
+                                   frameSize,
+                                   frameSize * 2,
+                                   1,
+                                   grad.gateGrad,
+                                   frameSize * 3,
+                                   value.gateWeight,
+                                   frameSize * 2,
+                                   1,
+                                   grad.prevOutGrad,
+                                   frameSize);
+
+      if (grad.gateWeightGrad) {
+        BlasGemm<Device, T>::compute(true,
+                                     false,
+                                     frameSize,
+                                     frameSize * 2,
+                                     batchSize,
+                                     1,
+                                     value.prevOutValue,
+                                     frameSize,
+                                     grad.gateGrad,
+                                     frameSize * 3,
+                                     1,
+                                     grad.gateWeightGrad,
+                                     frameSize * 2);
+      }
+    }
+#endif
+  }
+};
+
+}  // namespace paddle
diff --git a/paddle/gserver/layers/GruCompute.cpp b/paddle/gserver/layers/GruCompute.cpp
index 06907768e9..148516391c 100644
--- a/paddle/gserver/layers/GruCompute.cpp
+++ b/paddle/gserver/layers/GruCompute.cpp
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "GruCompute.h"
 #include "hl_recurrent_apply.cuh"
+#include "paddle/function/GruFunctor.h"
 #include "paddle/utils/Util.h"
 
 namespace paddle {
@@ -25,13 +26,13 @@ void GruCompute::init(LayerConfig &config) {
 
 template <>
 void GruCompute::forward<0>(hl_gru_value value, int frameSize, int batchSize) {
-  hl_cpu_gru_forward(hppl::forward::gru_resetOutput(),
-                     hppl::forward::gru_finalOutput(),
-                     value,
-                     frameSize,
-                     batchSize,
-                     activeNode_,
-                     activeGate_);
+  GruFunctor<DEVICE_TYPE_CPU, real>::compute(hppl::forward::gru_resetOutput(),
+                                             hppl::forward::gru_finalOutput(),
+                                             value,
+                                             frameSize,
+                                             batchSize,
+                                             activeNode_,
+                                             activeGate_);
 }
 
 template <>
@@ -39,14 +40,15 @@ void GruCompute::backward<0>(hl_gru_value value,
                              hl_gru_grad grad,
                              int frameSize,
                              int batchSize) {
-  hl_cpu_gru_backward(hppl::backward::gru_stateGrad(),
-                      hppl::backward::gru_resetGrad(),
-                      value,
-                      grad,
-                      frameSize,
-                      batchSize,
-                      activeNode_,
-                      activeGate_);
+  GruGradFunctor<DEVICE_TYPE_CPU, real>::compute(
+      hppl::backward::gru_stateGrad(),
+      hppl::backward::gru_resetGrad(),
+      value,
+      grad,
+      frameSize,
+      batchSize,
+      activeNode_,
+      activeGate_);
 }
 
 }  // namespace paddle
diff --git a/paddle/scripts/docker/build_android.sh b/paddle/scripts/docker/build_android.sh
index a61c7c40e9..34e31f1394 100644
--- a/paddle/scripts/docker/build_android.sh
+++ b/paddle/scripts/docker/build_android.sh
@@ -2,25 +2,8 @@
 
 set -xe
 
-COMPILER=gcc
-USE_EIGEN=ON
-if [ $COMPILER == clang ]; then
-  SUFFIX=_clang
-  C_COMPILER=clang
-  CXX_COMPILER=clang++
-else
-  SUFFIX=_gcc
-  C_COMPILER=gcc
-  CXX_COMPILER=g++
-fi
-if [ $USE_EIGEN == ON ]; then
-  SUFFIX=${SUFFIX}_eigen
-else
-  SUFFIX=${SUFFIX}_openblas
-fi
-
-BUILD_ROOT=/paddle/build_android$SUFFIX
-DEST_ROOT=/paddle/install$SUFFIX
+BUILD_ROOT=/paddle/build_android
+DEST_ROOT=/paddle/install
 
 rm -rf $BUILD_ROOT 2>/dev/null || true
 mkdir -p $BUILD_ROOT
@@ -41,7 +24,7 @@ if [ $ANDROID_ABI == "armeabi-v7a" ]; then
         -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \
         -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \
         -DCMAKE_BUILD_TYPE=Release \
-        -DUSE_EIGEN_FOR_BLAS=${USE_EIGEN} \
+        -DUSE_EIGEN_FOR_BLAS=ON \
         -DWITH_C_API=ON \
         -DWITH_SWIG_PY=OFF \
         -DWITH_STYLE_CHECK=OFF \
@@ -58,7 +41,7 @@ elif [ $ANDROID_ABI == "arm64-v8a" ]; then
         -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \
         -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \
         -DCMAKE_BUILD_TYPE=Release \
-        -DUSE_EIGEN_FOR_BLAS=${USE_EIGEN} \
+        -DUSE_EIGEN_FOR_BLAS=OFF \
         -DWITH_C_API=ON \
         -DWITH_SWIG_PY=OFF \
         ..