Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into refine-context

8 years ago · 6b2323cd6d
parent aced61dd89 53616fd1fb
commit 6b2323cd6d
39 changed files with 1183 additions and 128 deletions
--- a/doc/api/v2/config/layer.rst
+++ b/doc/api/v2/config/layer.rst
@ -198,6 +198,10 @@ identity_projection
 ..  autoclass:: paddle.v2.layer.identity_projection
    :noindex:

+slice_projection
+-------------------
+..  autoclass:: paddle.v2.layer.slice_projection
+    :noindex:

 table_projection
 ----------------
--- a/go/pserver/client/c/test/test_train.py
+++ b/go/pserver/client/c/test/test_train.py
@ -3,24 +3,11 @@ import paddle.v2.dataset.uci_housing as uci_housing
 import paddle.v2.master as master
 import os
 import cPickle as pickle
+from paddle.v2.reader.creator import cloud_reader

 etcd_ip = os.getenv("MASTER_IP", "127.0.0.1")
-etcd_endpoint = "http://" + etcd_ip + ":2379"
-print "connecting to master, etcd endpoints: ", etcd_endpoint
-master_client = master.client(etcd_endpoint, 5, 64)
-
-
-def cloud_reader():
-    global master_client
-    master_client.set_dataset(
-        ["/pfs/dlnel/public/dataset/uci_housing/uci_housing-*"], passes=30)
-    while 1:
-        r, e = master_client.next_record()
-        if not r:
-            if e != -2:  # other errors
-                print "get record error:", e
-            break
-        yield pickle.loads(r)
+etcd_endpoints = "http://" + etcd_ip + ":2379"
+print "etcd endpoints: ", etcd_endpoints


 def main():
@ -49,7 +36,7 @@ def main():
                                 parameters=parameters,
                                 update_equation=optimizer,
                                 is_local=False,
-                                 pserver_spec=etcd_endpoint,
+                                 pserver_spec=etcd_endpoints,
                                 use_etcd=True)

    # event_handler to print training and testing info
@ -75,7 +62,11 @@ def main():
    trainer.train(
        reader=paddle.batch(
            paddle.reader.shuffle(
-                cloud_reader, buf_size=500), batch_size=2),
+                cloud_reader(
+                    ["/pfs/dlnel/public/dataset/uci_housing/uci_housing*"],
+                    etcd_endpoints),
+                buf_size=500),
+            batch_size=2),
        feeding={'x': 0,
                 'y': 1},
        event_handler=event_handler,
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@ -32,4 +32,7 @@ add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch
 add_dependencies(framework_py_proto framework_py_proto_init)

 cc_library(net SRCS net.cc DEPS op_registry)
-cc_test(net_op_test SRCS net_op_test.cc DEPS net add_op mul_op sigmoid_op softmax_op fc_op)
+cc_test(net_op_test SRCS net_op_test.cc DEPS net)
+
+cc_library(backward SRCS backward.cc DEPS net)
+cc_test(backward_test SRCS backward_test.cc DEPS backward)
--- a/paddle/framework/backward.cc
+++ b/paddle/framework/backward.cc
@ -0,0 +1,178 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/framework/backward.h"
+#include <list>
+#include "paddle/framework/net.h"
+#include "paddle/framework/op_registry.h"
+
+namespace paddle {
+namespace framework {
+
+static bool AllInSet(const std::vector<std::string>& names,
+                     const std::string& suffix,
+                     const std::unordered_set<std::string>& set) {
+  for (auto& name : names) {
+    if (set.find(name + suffix) == set.end()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+static std::shared_ptr<OperatorBase> NOP() {
+  auto net_op = std::make_shared<NetOp>();
+  net_op->type_ = "@NOP@";
+  net_op->CompleteAddOp();
+  return net_op;
+}
+
+//  Get backward operator from a forward operator, recursively implementation.
+//
+//  no_grad_names the gradient variable names without gradient calculating.
+//
+//  uniq_id is a unique index used inside recursively calling BackwardRecursive.
+//  use `uid = uniq_id++;` to get the unique index, and pass `uniq_id` through
+//  recursive calling.
+//
+//  returns The backward operator. For simple situation, it is a simple
+//  operator. For complex situation, it is a NetOp.
+//
+//  See Backward.h for details
+static std::shared_ptr<OperatorBase> BackwardRecursive(
+    const OperatorBase& forwardOp,
+    std::unordered_set<std::string>& no_grad_names, size_t& uniq_id);
+std::shared_ptr<OperatorBase> BackwardRecursive(
+    const OperatorBase& forwardOp,
+    std::unordered_set<std::string>& no_grad_names, size_t& uniq_id) {
+  //  If all input gradients of forwarding operator do not need to calculate,
+  //  just return an NOP. Not return null ptr because NOP does not take
+  //  too much time for calculation, but it is useful for simplifying logic.
+  if (AllInSet(forwardOp.inputs_, OperatorBase::GRAD_VAR_SUFFIX(),
+               no_grad_names)) {
+    return NOP();
+  }
+
+  //  All output gradients of forwarding operator do not need to calculate. Then
+  //  all input gradients cannot be computed at all, and we put them into
+  //  `no_grad_names` set. Return an NOP.
+  if (AllInSet(forwardOp.outputs_, OperatorBase::GRAD_VAR_SUFFIX(),
+               no_grad_names)) {
+    for (auto& name : forwardOp.inputs_) {
+      // Mark all input is not need
+      no_grad_names.insert(name + OperatorBase::GRAD_VAR_SUFFIX());
+    }
+    return NOP();
+  }
+
+  // Returned gradient network
+  auto net = std::make_shared<NetOp>();
+
+  if (forwardOp.IsNetOp()) {
+    // Because forwardOp is a net op, it can static_cast.
+    auto& forwardNet = static_cast<const NetOp&>(forwardOp);
+
+    // Map from output gradient variable name to operator's indices in backward
+    // net. That operator generates that variable.
+    std::unordered_map<std::string, std::vector<size_t>> dup_output_ops;
+
+    size_t local_op_id = 0;
+    // reversely travel forwardNet
+    for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend();
+         ++it, ++local_op_id) {
+      auto fwd = *it;
+      auto bwd = BackwardRecursive(*fwd, no_grad_names, uniq_id);
+      net->AddOp(bwd);
+      for (auto& out : bwd->outputs_) {
+        dup_output_ops[out].emplace_back(local_op_id);
+      }
+    }
+    // Get unique ID for this method.
+    auto uid = uniq_id++;
+    // TODO(dzh): more comment
+    using Pos = std::pair<size_t, std::shared_ptr<OperatorBase>>;
+    std::list<Pos> insert_position;
+    for (auto& dup_output_op : dup_output_ops) {
+      const std::string& name = dup_output_op.first;
+      auto& dup_op = dup_output_op.second;
+      if (dup_op.size() == 1) continue;
+      std::vector<std::string> dup_outputs;
+
+      for (size_t i = 0; i < dup_op.size(); ++i) {
+        auto op_offset = dup_op[i];
+        dup_outputs.push_back(name + "@RENAME@" + std::to_string(uid) + "@" +
+                              std::to_string(i));
+        net->ops_[op_offset]->Rename(name, dup_outputs.back());
+      }
+      insert_position.push_back(
+          {dup_op.back(),
+           OpRegistry::CreateOp(
+               "add", {dup_outputs}, {name},
+               {{"input_format",
+                 std::vector<int>{0, static_cast<int>(dup_outputs.size())}}})});
+    }
+
+    insert_position.sort(
+        [](const Pos& l, const Pos& r) { return l.first > r.first; });
+
+    for (auto& pos : insert_position) {
+      net->InsertOp(pos.first + 1, pos.second);
+    }
+
+  } else {
+    std::shared_ptr<OperatorBase> grad_op = OpRegistry::CreateGradOp(forwardOp);
+    for (std::string& grad_input : grad_op->inputs_) {
+      if (no_grad_names.count(grad_input)) {
+        std::string prefix = grad_input.substr(
+            0, grad_input.size() - OperatorBase::GRAD_VAR_SUFFIX().size());
+        grad_input = prefix + OperatorBase::ZERO_VAR_SUFFIX();
+
+        // If part of input gradient of that operator is not calculated, fill
+        // zero variables to that input gradient.
+        net->AddOp(OpRegistry::CreateOp("fill_zeros_like", {prefix},
+                                        {grad_input}, {}));
+      }
+    }
+
+    for (std::string& grad_output : grad_op->outputs_) {
+      if (no_grad_names.count(grad_output)) {
+        grad_output = OperatorBase::EMPTY_VAR_NAME();
+      }
+    }
+
+    if (net->ops_.empty()) {  // Current no aux op is added to network
+      return grad_op;
+    }
+    net->AddOp(grad_op);
+  }
+  net->type_ = "@GENERATED_BACKWARD@";
+  net->CompleteAddOp();
+  return net;
+}
+
+// See header for comments
+std::shared_ptr<OperatorBase> Backward(
+    const OperatorBase& forwardOp,
+    const std::unordered_set<std::string>& no_grad_vars) {
+  std::unordered_set<std::string> no_grad_names;
+  no_grad_names.reserve(no_grad_vars.size());
+
+  for (auto& name : no_grad_vars) {
+    no_grad_names.insert(name + OperatorBase::GRAD_VAR_SUFFIX());
+  }
+  size_t uid = 0;
+  return BackwardRecursive(forwardOp, no_grad_names, uid);
+}
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/framework/backward.h
+++ b/paddle/framework/backward.h
@ -0,0 +1,27 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#pragma once
+#include <unordered_set>
+#include "operator.h"
+namespace paddle {
+namespace framework {
+
+// Create the backward operator from a forward operator.
+// TODO(yuyang18): Add more API reference comment.
+extern std::shared_ptr<OperatorBase> Backward(
+    const OperatorBase& forwardOp,
+    const std::unordered_set<std::string>& no_grad_vars);
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/framework/backward.md
+++ b/paddle/framework/backward.md
@ -0,0 +1,38 @@
+## Operator/expression 's Backward
+
+### Motivation
+
+In Neural Network, the backpropagation algorithm follows the chain rule, so we need to compound the fundmental gradient operators/expressions together with chain rule . Every forward network need a backward network to construct the full computation lineage, the operator/ expression's Backward feature will generate the backward pass respect to forward pass.
+
+### Implement : gradient operator registry
+
+|                        | forward operator | backward operator                |
+| ---------------------- | ---------------- | -------------------------------- |
+| **Operator::inputs_**  | Inputs           | Inputs, Outputs, OutputGradients |
+| **Operator::outputs_** | Outputs          | InputGradients                   |
+
+Inputs/Outputs means the input/output of the operator,  InputGradients/OutputGradients is the gradient respect to forward opeartor. Forward operator and Backward operator are isomorphic, save their corresponding needs into member attribute.
+
+We use a global hash map record the gradient operators available, follow the philosophy  of minimum core, make operator pluggable unit. Each gradient is an operator and it needs to regist itself. 
+
+grad_op_builder(fengjiayi)
+
+### Implement : Backward network
+
+given a forward network, it generates the backward network. We only care about the Gradients—`OutputGradients`,`InputGradients`.
+
+1. bla bla bla (yuyang)
+
+2. NetOp 
+
+   when the input forward network is a NetOp, it need to call the sub NetOp/Operators backward function recursively and ensure them done. During the process, we need to collect the `OutputGradients` name.
+
+   We share variable in the same scope, as a result, duplicate operator `OutputGradients` will overwirte then duplicate variable.  
+
+   ![./images/duplicate_op]()
+
+    Share variable between operators or same input variable used in multiple operators lead to a duplicate gradient variable. As demo show above, we need to rename gradient name recursively, and add a generic add operator instead. 
+
+![./images/duplicate_op2]()
+
+	Then collect the sub graph OutputGradients/InputGradients as the NetOp's and return it.
--- a/paddle/framework/backward_test.cc
+++ b/paddle/framework/backward_test.cc
--- a/paddle/framework/grad_op_builder.cc
+++ b/paddle/framework/grad_op_builder.cc
@ -20,7 +20,7 @@ namespace framework {

 OperatorBase* GradOpBuilder::Build() {
  BuildOpInOutArgList();
-  std::string grad_op_type = OpRegistry::grad_ops().at(op_->type_);
+  std::string grad_op_type = OpRegistry::grad_ops().at(op_.type_);
  OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)();
  grad_op->type_ = grad_op_type;
  CompleteGradOp(grad_op);
@ -39,15 +39,15 @@ OpInOutArg* GradOpBuilder::BuildArg(const VarProto& var,
 }

 void GradOpBuilder::BuildOpInOutArgList() {
-  const OpProto& op_proto = OpRegistry::protos().at(op_->type_);
-  const auto& var_map = *(OpRegistry::VarIndexMaps().at(op_->type_));
+  const OpProto& op_proto = OpRegistry::protos().at(op_.type_);
+  const auto& var_map = *(OpRegistry::VarIndexMaps().at(op_.type_));
  const std::vector<int>& in_format =
-      op_->attrs_.count("input_format")
-          ? op_->GetAttr<std::vector<int>>("input_format")
+      op_.attrs_.count("input_format")
+          ? op_.GetAttr<std::vector<int>>("input_format")
          : std::vector<int>();
  const std::vector<int>& out_format =
-      op_->attrs_.count("output_format")
-          ? op_->GetAttr<std::vector<int>>("output_format")
+      op_.attrs_.count("output_format")
+          ? op_.GetAttr<std::vector<int>>("output_format")
          : std::vector<int>();
  for (const auto& var : op_proto.inputs()) {
    arg_list_.emplace_back(
@ -70,8 +70,7 @@ void GradOpBuilder::AddArgIntoGradOp(const OpInOutArg* arg,
  }
  (*varmap)[var_name] = idx++;
  size_t pre_sz = in_out.size();
-  auto base_it =
-      arg->type_ == IN ? op_->inputs_.begin() : op_->outputs_.begin();
+  auto base_it = arg->type_ == IN ? op_.inputs_.begin() : op_.outputs_.begin();
  std::copy(base_it + arg->begin_idx_, base_it + arg->end_idx_,
            std::back_inserter(in_out));
  if (is_grad) {
@ -83,7 +82,7 @@ void GradOpBuilder::AddArgIntoGradOp(const OpInOutArg* arg,
 }

 void GradOpBuilder::CompleteGradOp(OperatorBase* grad_op) const {
-  grad_op->attrs_ = op_->attrs_;
+  grad_op->attrs_ = op_.attrs_;
  grad_op->attrs_.erase("input_format");
  grad_op->attrs_.erase("output_format");
  VarIndexMap* grad_varmap = new VarIndexMap();
--- a/paddle/framework/grad_op_builder.h
+++ b/paddle/framework/grad_op_builder.h
@ -29,7 +29,7 @@ class GradOpBuilder {
  using VarIndexMap = std::unordered_map<std::string, int>;

 public:
-  GradOpBuilder(const OperatorBase* op) : op_(op) {}
+  GradOpBuilder(const OperatorBase& op) : op_(op) {}
  OperatorBase* Build();

 private:
@ -40,7 +40,7 @@ class GradOpBuilder {
                        std::vector<int>& format, VarIndexMap* varmap, int& idx,
                        bool is_grad) const;
  void CompleteGradOp(OperatorBase* grad_op) const;
-  const OperatorBase* op_;
+  const OperatorBase& op_;
  std::vector<std::shared_ptr<OpInOutArg>> arg_list_;
 };

--- a/paddle/framework/grad_op_builder_test.cc
+++ b/paddle/framework/grad_op_builder_test.cc
@ -11,7 +11,7 @@ namespace framework {
 TEST(GradOpBuilder, AddTwo) {
  std::shared_ptr<OperatorBase> add_op(
      OpRegistry::CreateOp("add_two", {"x", "y"}, {"out"}, {}));
-  std::shared_ptr<OperatorBase> grad_add_op = OpRegistry::CreateGradOp(add_op);
+  std::shared_ptr<OperatorBase> grad_add_op = OpRegistry::CreateGradOp(*add_op);
  EXPECT_EQ(static_cast<int>(grad_add_op->inputs_.size()), 4);
  EXPECT_EQ(static_cast<int>(grad_add_op->outputs_.size()), 2);
  EXPECT_EQ(grad_add_op->Input("X"), "x");
--- a/paddle/framework/images/duplicate_op.graffle
+++ b/paddle/framework/images/duplicate_op.graffle
--- a/paddle/framework/images/duplicate_op.png
+++ b/paddle/framework/images/duplicate_op.png
--- a/paddle/framework/images/duplicate_op2.graffle
+++ b/paddle/framework/images/duplicate_op2.graffle
--- a/paddle/framework/images/duplicate_op2.png
+++ b/paddle/framework/images/duplicate_op2.png
--- a/paddle/framework/net.h
+++ b/paddle/framework/net.h
@ -68,9 +68,18 @@ class NetOp : public OperatorBase {
   */
  void AddOp(const std::shared_ptr<OperatorBase>& op) {
    PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed");
+    PADDLE_ENFORCE(op != nullptr, "Cannot Insert Null op");
    ops_.push_back(op);
  }

+  void InsertOp(size_t pos, const std::shared_ptr<OperatorBase>& op) {
+    PADDLE_ENFORCE(!add_op_done_,
+                   "Cannot InsertOp when this network is sealed");
+    PADDLE_ENFORCE(op != nullptr, "Cannot Insert Null op");
+    PADDLE_ENFORCE(pos <= ops_.size(), "Out of range");
+    ops_.insert(ops_.begin() + pos, op);
+  }
+
  void CompleteAddOp(bool calculate = true);

  std::string DebugString() const override;
--- a/paddle/framework/net_op_test.cc
+++ b/paddle/framework/net_op_test.cc
@ -3,11 +3,6 @@
 #include <paddle/framework/op_registry.h>
 #include <paddle/framework/operator.h>

-USE_OP(add_two);
-USE_OP(mul);
-USE_OP(sigmoid);
-USE_OP(softmax);
-
 namespace paddle {
 namespace framework {

@ -25,6 +20,13 @@ class TestOp : public OperatorBase {
  }
 };

+class EmptyOp : public OperatorBase {
+ public:
+  void InferShape(const Scope& scope) const override {}
+  void Run(const Scope& scope,
+           const platform::DeviceContext& dev_ctx) const override {}
+};
+
 template <typename T>
 void AssertSameVectorWithoutOrder(const std::vector<T>& expected,
                                  const std::vector<T>& actual) {
@ -71,20 +73,17 @@ TEST(OpKernel, all) {
  ASSERT_THROW(net->AddOp(op2), paddle::platform::EnforceNotMet);
 }

-//! TODO(yuyang18): Refine Backward Op.
-// TEST(AddBackwardOp, TestGradOp) {
-//  auto net = std::make_shared<NetOp>();
-//  ASSERT_NE(net, nullptr);
-//  net->AddOp(framework::OpRegistry::CreateOp("mul", {"X", "Y"}, {"Out"}, {}));
-//  net->AddOp(
-//      framework::OpRegistry::CreateOp("add_two", {"X", "Y"}, {"Out"}, {}));
-//  net->AddOp(framework::OpRegistry::CreateOp("add_two", {"X", "Y"}, {""},
-//  {}));
-//  auto grad_ops = AddBackwardOp(net);
-//  for (auto& op : grad_ops->ops_) {
-//    op->DebugString();
-//  }
-//}
+TEST(Net, insert_op) {
+  NetOp net;
+  auto op1 = std::make_shared<EmptyOp>();
+  op1->inputs_ = {"x", "w1", "b1"};
+  op1->outputs_ = {"y"};
+  net.AddOp(op1);
+  net.InsertOp(0, op1);
+  ASSERT_EQ(2UL, net.ops_.size());
+  net.InsertOp(2, op1);
+  ASSERT_EQ(3UL, net.ops_.size());
+}

 }  // namespace framework
 }  // namespace paddle
--- a/paddle/framework/op_registry.h
+++ b/paddle/framework/op_registry.h
@ -86,43 +86,46 @@ class OpProtoAndCheckerMaker {
  }

 protected:
-  void AddInput(const std::string& name, const std::string& comment,
-                bool multiple = false, bool ignore_gradient = false) {
+  struct VariableBuilder {
+    VarProto* var_;
+    std::function<void()> on_multiple_;
+    std::function<void()> on_temporary_;
+
+    VariableBuilder& SetMultiple() {
+      var_->set_multiple(true);
+      on_multiple_();
+      return *this;
+    }
+
+    VariableBuilder& SetTemporary() {
+      PADDLE_ENFORCE(bool(on_temporary_), "Cannot set temporary");
+      var_->set_temporary(true);
+      on_temporary_();
+      return *this;
+    }
+
+    VariableBuilder& IgnoreGradient() {
+      var_->set_ignore_gradient(true);
+      return *this;
+    }
+  };
+
+  VariableBuilder AddInput(const std::string& name,
+                           const std::string& comment) {
    auto input = proto_->mutable_inputs()->Add();
    *input->mutable_name() = name;
    *input->mutable_comment() = comment;
-    input->set_ignore_gradient(ignore_gradient);
-    input->set_multiple(multiple);
-    if (multiple) {
-      SetHasMultipleInput();
-    }
-  }
-
-  void AddInputs(const std::string& name, const std::string& comment,
-                 bool ignore_gradient = false) {
-    AddInput(name, comment, true, ignore_gradient);
+    return VariableBuilder{input, [=] { this->SetHasMultipleInput(); },
+                           nullptr};
  }

-  void AddOutput(const std::string& name, const std::string& comment,
-                 bool temporary = false, bool multiple = false,
-                 bool ignore_gradient = false) {
+  VariableBuilder AddOutput(const std::string& name,
+                            const std::string& comment) {
    auto output = proto_->mutable_outputs()->Add();
    *output->mutable_name() = name;
    *output->mutable_comment() = comment;
-    output->set_ignore_gradient(ignore_gradient);
-    output->set_multiple(multiple);
-    if (multiple) {
-      SetHasMultipleOutput();
-    }
-    output->set_temporary(temporary);
-    if (temporary) {
-      SetHasTemporaryOutput();
-    }
-  }
-
-  void AddOutputs(const std::string& name, const std::string& comment,
-                  bool temporary = false, bool ignore_gradient = false) {
-    AddOutput(name, comment, temporary, true, ignore_gradient);
+    return VariableBuilder{output, [=] { this->SetHasMultipleOutput(); },
+                           [=] { this->SetHasTemporaryOutput(); }};
  }

  template <typename T>
@ -300,9 +303,10 @@ class OpRegistry {
    return CreateOp(op_desc.type(), inputs, outputs, attrs);
  }

-  static std::shared_ptr<OperatorBase> CreateGradOp(
-      std::shared_ptr<OperatorBase> op) {
-    GradOpBuilder builder(op.get());
+  static std::shared_ptr<OperatorBase> CreateGradOp(const OperatorBase& op) {
+    PADDLE_ENFORCE(!op.IsNetOp(),
+                   "Use framework::Backward to get backward ops");
+    GradOpBuilder builder(op);
    std::shared_ptr<OperatorBase> grad_op(builder.Build());
    grad_op->Init();
    return grad_op;
--- a/paddle/framework/op_registry_test.cc
+++ b/paddle/framework/op_registry_test.cc
@ -36,9 +36,8 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
 public:
  MyTestOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInputs("input", "input of cosine op");
-    AddOutput("output", "output of cosine op",
-              /*temporary*/ true);
+    AddInput("input", "input of cosine op").SetMultiple();
+    AddOutput("output", "output of cosine op").SetTemporary();
    auto my_checker = [](int i) {
      PADDLE_ENFORCE(i % 2 == 0, "'test_attr' must be even!");
    };
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@ -107,5 +107,11 @@ std::string OperatorBase::DebugString() const {
  return ss.str();
 }

+void OperatorBase::Rename(const std::string& old_name,
+                          const std::string& new_name) {
+  std::replace(inputs_.begin(), inputs_.end(), old_name, new_name);
+  std::replace(outputs_.begin(), outputs_.end(), old_name, new_name);
+}
+
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@ -14,6 +14,7 @@ limitations under the License. */

 #pragma once

+#include <algorithm>
 #include <boost/variant.hpp>
 #include <string>
 #include <unordered_map>
@ -54,6 +55,9 @@ class OperatorBase {
  /// e.g. Variable "x@GRAD" is the gradient of varibale "x".
  static std::string GRAD_VAR_SUFFIX() { return "@GRAD"; }

+  /// Variables with this suffix are supposed to be filled up with zeros.
+  static std::string ZERO_VAR_SUFFIX() { return "@ZERO"; }
+
  virtual ~OperatorBase() {}

  template <typename T>
@ -79,8 +83,12 @@ class OperatorBase {

  virtual bool IsNetOp() const { return false; }

+  /// rename inputs outputs name
+  void Rename(const std::string& old_name, const std::string& new_name);
+
  //! Get a input with argument's name described in `op_proto`
  const std::string& Input(const std::string& name) const;
+
  //! Get a input which has multiple variables.
  //! TODO add a vector_view to prevent memory copy.
  std::vector<std::string> Inputs(const std::string& name) const;
@ -92,7 +100,13 @@ class OperatorBase {

 public:
  std::string type_;
+  // NOTE: in case of OpGrad, inputs_ contains:
+  // I (Inputs)
+  // O (Outputs)
+  // OG (Output Gradients)
  std::vector<std::string> inputs_;
+  // NOTE: in case of OpGrad, outputs_ contains
+  // IG (Inputs Gradients)
  std::vector<std::string> outputs_;
  AttributeMap attrs_;
  // store the arguments' offset described in op_desc.
--- a/paddle/framework/operator_test.cc
+++ b/paddle/framework/operator_test.cc
@ -137,9 +137,9 @@ class OpKernelTestMultiInputsProtoAndCheckerMaker
  OpKernelTestMultiInputsProtoAndCheckerMaker(OpProto* proto,
                                              OpAttrChecker* op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInputs("xs", "inputs of test op");
+    AddInput("xs", "inputs of test op").SetMultiple();
    AddInput("k", "input of test op");
-    AddOutputs("ys", "outputs of test op");
+    AddOutput("ys", "outputs of test op").SetMultiple();
    AddAttr<float>("scale", "scale of cosine op")
        .SetDefault(1.0)
        .LargerThan(0.0);
--- a/paddle/gserver/layers/SliceProjection.cpp
+++ b/paddle/gserver/layers/SliceProjection.cpp
@ -0,0 +1,96 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "Projection.h"
+
+namespace paddle {
+
+/**
+ * SliceProjection can slice the input value into multiple parts,
+ * and then select some of them to merge into a new output.
+ *
+ * First, calculate the slices that need to be merged into the output.
+ * slices = input.slices().for_output()
+ *
+ * Second, merge each slice into the output.
+ * for(auto slice: slices) {
+ *   out.addAtOffset(slice, offset);
+ * }
+ *
+ * Input slices as output: s0, s1, ...:
+ *   -----------------------
+ *   |///|   |//////|      |
+ *   |/s0|   |//s1//|      |
+ *   |///|   |//////|      |
+ *   -----------------------
+ * Output, merge s0, s1, ... into one output:
+ *   ----------------
+ *   |///|//////|   |
+ *   |/s0|//s1//|...|
+ *   |///|//////|   |
+ *   ----------------
+ *
+ * The config file api is slice_projection.
+ */
+class SliceProjection : public Projection {
+public:
+  SliceProjection(const ProjectionConfig& config,
+                  const ParameterPtr& parameter,
+                  bool useGpu);
+  virtual void forward();
+  virtual void backward(const UpdateCallback& callback);
+
+protected:
+  std::vector<std::pair<size_t, size_t>> slices_;
+};
+
+REGISTER_PROJECTION(slice, SliceProjection);
+
+/**
+ * Constructed function.
+ * @note SliceProjection should not have any parameter.
+ */
+SliceProjection::SliceProjection(const ProjectionConfig& config,
+                                 const ParameterPtr& parameter,
+                                 bool useGpu)
+    : Projection(config, parameter, useGpu) {
+  CHECK(!parameter) << "'slice' projection should not have any parameter";
+
+  slices_.reserve(config.slices_size());
+  for (const auto& slice : config.slices()) {
+    slices_.push_back(std::make_pair(slice.start(), slice.end()));
+  }
+}
+
+void SliceProjection::forward() {
+  size_t offset = 0;
+  for (auto& slice : slices_) {
+    auto slice_out = in_->value->subColMatrix(slice.first, slice.second);
+    out_->value->addAtOffset(*slice_out, offset);
+    offset += slice_out->getWidth();
+  }
+}
+
+void SliceProjection::backward(const UpdateCallback& callback) {
+  if (in_->grad) {
+    size_t offset = 0;
+    for (auto& slice : slices_) {
+      auto slice_out = in_->grad->subColMatrix(slice.first, slice.second);
+      slice_out->addAtOffset(*out_->grad, offset);
+      offset += slice_out->getWidth();
+    }
+  }
+}
+
+}  // namespace paddle
--- a/paddle/gserver/tests/concat_slice_a.conf
+++ b/paddle/gserver/tests/concat_slice_a.conf
@ -0,0 +1,41 @@
+#edit-mode: -*- python -*-
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from paddle.trainer_config_helpers import *
+
+settings(batch_size=10)
+
+data = data_layer(name ="input", size=8*16*16)
+
+conv1 = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
+                       num_channels=8,
+                       num_filters=16, stride=1,
+                       bias_attr=False,
+                       act=ReluActivation())
+conv2 = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
+                       num_channels=8,
+                       num_filters=16, stride=1,
+                       bias_attr=False,
+                       act=ReluActivation())
+
+proj1 = slice_projection(input=conv1, slices=[(0, 4), (4, 12)])
+
+proj2 = slice_projection(input=conv2, slices=[(1, 5), (5, 15)])
+
+concat = concat_layer(input=[proj1, proj2])
+
+outputs(concat)
+
--- a/paddle/gserver/tests/concat_slice_b.conf
+++ b/paddle/gserver/tests/concat_slice_b.conf
@ -0,0 +1,41 @@
+#edit-mode: -*- python -*-
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from paddle.trainer_config_helpers import *
+
+settings(batch_size=10)
+
+data = data_layer(name ="input", size=8*16*16)
+
+conv1 = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
+                       num_channels=8,
+                       num_filters=16, stride=1,
+                       bias_attr=False,
+                       act=ReluActivation())
+conv2 = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
+                       num_channels=8,
+                       num_filters=16, stride=1,
+                       bias_attr=False,
+                       act=ReluActivation())
+
+proj1 = slice_projection(input=conv1, slices=[(0, 12)])
+
+proj2 = slice_projection(input=conv2, slices=[(1, 15)])
+
+concat = concat_layer(input=[proj1, proj2])
+
+outputs(concat)
+
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@ -152,6 +152,26 @@ TEST(Projection, identity) {
  }
 }

+TEST(Projection, slice) {
+  ProjectionConfig conf;
+  conf.set_type("slice");
+  conf.set_input_size(100);
+  SliceConfig& slice1 = *conf.add_slices();
+  slice1.set_start(10);
+  slice1.set_end(20);
+  SliceConfig& slice2 = *conf.add_slices();
+  slice2.set_start(50);
+  slice2.set_end(70);
+  conf.set_output_size(30);
+  for (auto useGpu : {false, true}) {
+    testProjectionGrad(conf,
+                       INPUT_DATA,
+                       /* parameterSize */ 0,
+                       /* batchSize */ 10,
+                       useGpu);
+  }
+}
+
 TEST(Projection, scaling) {
  ProjectionConfig conf;
  conf.set_type("scaling");
--- a/Show More
+++ b/Show More