Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into add_dilation

8 years ago · 47329f6bfc
parent ee4140103d 5906baa3f4
commit 47329f6bfc
34 changed files with 762 additions and 351 deletions
--- a/.gitignore
+++ b/.gitignore
@ -28,3 +28,4 @@ cmake_install.cmake
 paddle/.timestamp
 python/paddlepaddle.egg-info/
 paddle/pybind/pybind.h
+python/paddle/v2/framework/tests/tmp/*
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@ -15,7 +15,7 @@ nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor)

 cc_test(variable_test SRCS variable_test.cc)

-cc_library(scope SRCS scope.cc)
+cc_library(scope SRCS scope.cc DEPS glog)
 cc_test(scope_test SRCS scope_test.cc DEPS scope)


--- a/paddle/framework/op_desc.h
+++ b/paddle/framework/op_desc.h
@ -107,6 +107,8 @@ class OpDescBind {

  void InferVarType(BlockDescBind *block) const;

+  void MarkAsTarget() { desc_.set_is_target(true); }
+
  void Flush();

 private:
--- a/paddle/framework/program_desc.cc
+++ b/paddle/framework/program_desc.cc
@ -49,6 +49,13 @@ ProgramDescBind::ProgramDescBind(const ProgramDescBind &o) {
  }
 }

+ProgramDescBind::ProgramDescBind(const ProgramDesc &desc) {
+  desc_ = desc;
+  for (auto &block_desc : *desc_.mutable_blocks()) {
+    blocks_.emplace_back(new BlockDescBind(this, &block_desc));
+  }
+}
+
 ProgramDescBind::ProgramDescBind(const std::string &binary_str) {
  PADDLE_ENFORCE(desc_.ParseFromString(binary_str),
                 "Fail to parse program_desc from binary string.");
--- a/paddle/framework/program_desc.h
+++ b/paddle/framework/program_desc.h
@ -29,6 +29,8 @@ class ProgramDescBind {
 public:
  ProgramDescBind();

+  explicit ProgramDescBind(const ProgramDesc &desc);
+
  ProgramDescBind(const ProgramDescBind &o);

  explicit ProgramDescBind(const std::string &binary_str);
--- a/paddle/framework/prune.cc
+++ b/paddle/framework/prune.cc
@ -46,7 +46,7 @@ bool IsTarget(const OpDesc& op_desc) {
  return false;
 }

-void prune_impl(const ProgramDesc& input, ProgramDesc& output, int block_id) {
+void prune_impl(const ProgramDesc& input, ProgramDesc* output, int block_id) {
  // TODO(tonyyang-svail):
  //    - will change to use multiple blocks for RNN op and Cond Op

@ -91,8 +91,8 @@ void prune_impl(const ProgramDesc& input, ProgramDesc& output, int block_id) {
  // we reverse the should_run vector
  std::reverse(should_run.begin(), should_run.end());

-  output = input;
-  auto* op_field = output.mutable_blocks(block_id)->mutable_ops();
+  *output = input;
+  auto* op_field = output->mutable_blocks(block_id)->mutable_ops();
  op_field->Clear();
  for (size_t i = 0; i < should_run.size(); ++i) {
    if (should_run[i]) {
@ -101,7 +101,8 @@ void prune_impl(const ProgramDesc& input, ProgramDesc& output, int block_id) {
  }
 }

-void Prune(const ProgramDesc& input, ProgramDesc& output) {
+// TODO(fengjiayi): Prune() could be inplaced to avoid unnecessary copies
+void Prune(const ProgramDesc& input, ProgramDesc* output) {
  prune_impl(input, output, 0);
 }

--- a/paddle/framework/prune.h
+++ b/paddle/framework/prune.h
@ -20,7 +20,7 @@ limitations under the License. */
 namespace paddle {
 namespace framework {

-void Prune(const ProgramDesc& input, ProgramDesc& output);
+void Prune(const ProgramDesc& input, ProgramDesc* output);

 }  // namespace framework
 }  // namespace paddle
--- a/paddle/framework/prune_test.cc
+++ b/paddle/framework/prune_test.cc
@ -59,11 +59,11 @@ TEST(Prune, one_operator) {
  f::ProgramDesc *pdesc = program.Proto();
  f::ProgramDesc pruned;

-  Prune(*pdesc, pruned);
+  Prune(*pdesc, &pruned);
  PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 0);

  pdesc->mutable_blocks(0)->mutable_ops(0)->set_is_target(true);
-  Prune(*pdesc, pruned);
+  Prune(*pdesc, &pruned);
  PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 1);
 }

@ -81,7 +81,7 @@ TEST(Prune, forward) {
  for (int i = 0; i < pdesc->blocks(0).ops_size(); ++i) {
    f::ProgramDesc pruned;
    pdesc->mutable_blocks(0)->mutable_ops(i)->set_is_target(true);
-    Prune(*pdesc, pruned);
+    Prune(*pdesc, &pruned);
    PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), i + 1);
  }
 }
@ -100,7 +100,7 @@ TEST(Prune, multi_input_op) {
  pdesc->mutable_blocks(0)->mutable_ops(3)->set_is_target(true);

  f::ProgramDesc pruned;
-  Prune(*pdesc, pruned);
+  Prune(*pdesc, &pruned);
  PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 4);
 }

@ -116,7 +116,7 @@ TEST(Prune, multi_output_op) {
  pdesc->mutable_blocks(0)->mutable_ops(2)->set_is_target(true);

  f::ProgramDesc pruned;
-  Prune(*pdesc, pruned);
+  Prune(*pdesc, &pruned);
  PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 2);
 }

@ -133,6 +133,6 @@ TEST(Prune, multi_target) {
  pdesc->mutable_blocks(0)->mutable_ops(2)->set_is_target(true);

  f::ProgramDesc pruned;
-  Prune(*pdesc, pruned);
+  Prune(*pdesc, &pruned);
  PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 3);
 }
--- a/paddle/framework/scope.cc
+++ b/paddle/framework/scope.cc
@ -16,6 +16,7 @@ limitations under the License. */

 #include <memory>  // for unique_ptr
 #include <mutex>   // for call_once
+#include "glog/logging.h"
 #include "paddle/string/printf.h"

 namespace paddle {
@ -23,7 +24,10 @@ namespace framework {

 Scope::~Scope() {
  DropKids();
-  for (auto& kv : vars_) delete kv.second;
+  for (auto& kv : vars_) {
+    VLOG(3) << "Destroy variable " << kv.first;
+    delete kv.second;
+  }
 }

 Scope& Scope::NewScope() const {
@ -38,6 +42,7 @@ Variable* Scope::Var(const std::string& name) {
  }
  Variable* v = new Variable();
  vars_[name] = v;
+  VLOG(3) << "Create variable " << name << " on scope";
  v->name_ = &(vars_.find(name)->first);
  return v;
 }
--- a/paddle/memory/CMakeLists.txt
+++ b/paddle/memory/CMakeLists.txt
@ -1,6 +1,6 @@
 add_subdirectory(detail)

-cc_library(memory SRCS memory.cc)
+cc_library(memory SRCS memory.cc DEPS place)
 cc_library(memcpy SRCS memcpy.cc)

 cc_library(paddle_memory
--- a/paddle/memory/detail/meta_cache.cc
+++ b/paddle/memory/detail/meta_cache.cc
@ -13,6 +13,7 @@
   limitations under the License. */

 #include "paddle/memory/detail/meta_cache.h"
+#include "glog/logging.h"
 #include "paddle/memory/detail/memory_block.h"
 #include "paddle/platform/assert.h"

@ -28,7 +29,9 @@ Metadata MetadataCache::load(const MemoryBlock* block) {
    PADDLE_ASSERT(existing_metadata->second.check_guards());
    return existing_metadata->second;
  } else {
-    PADDLE_ASSERT(reinterpret_cast<const Metadata*>(block)->check_guards());
+    auto* meta = reinterpret_cast<const Metadata*>(block);
+    VLOG(3) << "Load MetaData type=" << meta->type;
+    PADDLE_ASSERT(meta->check_guards());
    return *reinterpret_cast<const Metadata*>(block);
  }
 }
--- a/paddle/memory/memory.cc
+++ b/paddle/memory/memory.cc
@ -39,11 +39,15 @@ BuddyAllocator* GetCPUBuddyAllocator() {

 template <>
 void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) {
-  return GetCPUBuddyAllocator()->Alloc(size);
+  VLOG(3) << "Allocate " << size << " bytes on " << platform::Place(place);
+  void* p = GetCPUBuddyAllocator()->Alloc(size);
+  VLOG(3) << "  pointer=" << p;
+  return p;
 }

 template <>
 void Free<platform::CPUPlace>(platform::CPUPlace place, void* p) {
+  VLOG(3) << "Free pointer=" << p << " on " << platform::Place(place);
  GetCPUBuddyAllocator()->Free(p);
 }

--- a/paddle/operators/batch_norm_op.cu
+++ b/paddle/operators/batch_norm_op.cu
@ -117,9 +117,6 @@ class BatchNormKernel<platform::GPUPlace, T> : public framework::OpKernel<T> {
    math::SetConstant<platform::GPUPlace, T> functor;
    functor(ctx.device_context(), saved_mean, 0);
    functor(ctx.device_context(), saved_variance, 0);
-    // FIXME(qiao) should not set zero self
-    functor(ctx.device_context(), mean_out, 0);
-    functor(ctx.device_context(), variance_out, 0);

    auto handle = ctx.cuda_device_context().cudnn_handle();

@ -211,8 +208,15 @@ class BatchNormGradKernel<platform::GPUPlace, T>
    mode_ = CUDNN_BATCHNORM_SPATIAL;
 #endif

-    std::vector<int> dims = {N, C, H, W, D};
-    std::vector<int> strides = {H * W * C * D, 1, W * D * C, D * C, C};
+    std::vector<int> dims;
+    std::vector<int> strides;
+    if (tensor_format == TensorFormat::NCHW) {
+      dims = {N, C, H, W, D};
+      strides = {C * H * W * D, H * W * D, W * D, D, 1};
+    } else {
+      dims = {N, C, H, W, D};
+      strides = {H * W * C * D, 1, W * D * C, D * C, C};
+    }
    CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
        data_desc_, CudnnDataType<T>::type,
        x_dims.size() > 3 ? x_dims.size() : 4, dims.data(), strides.data()));
--- a/paddle/operators/sequence_pool_op.h
+++ b/paddle/operators/sequence_pool_op.h
@ -144,11 +144,11 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
          Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
              in_t_map(in_t.data<T>(), h, w);
          int row_id;
-          Eigen::array<int, 2> extents = {1, 1};
+          Eigen::array<int, 2> extents{{1, 1}};
          for (int col_id = 0; col_id < w; col_id++) {
            in_t_map.col(col_id).maxCoeff(&row_id);
-            Eigen::array<int, 2> in_offsets = {row_id, col_id};
-            Eigen::array<int, 2> out_offsets = {0, col_id};
+            Eigen::array<int, 2> in_offsets{{row_id, col_id}};
+            Eigen::array<int, 2> out_offsets{{0, col_id}};
            in_g_e.slice(in_offsets, extents).device(place) =
                out_g_e.slice(out_offsets, extents);
          }
--- a/paddle/operators/sign_op.cc
+++ b/paddle/operators/sign_op.cc
@ -0,0 +1,70 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/sign_op.h"
+
+namespace paddle {
+namespace operators {
+
+class SignOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext *ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"),
+                   "Input(X) of SignOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("Out"),
+                   "Output(Out) of SignOp should not be null.");
+    ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
+    ctx->ShareLoD("X", /*->*/ "Out");
+  }
+};
+
+template <typename AttrType>
+class SignOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  SignOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "(Tensor) Input tensor of sign operator.");
+    AddOutput("Out", "(Tensor) Output tensor of sign operator.");
+    AddComment(R"DOC(Sign operator
+
+The equation is: Out = X.sign()
+)DOC");
+  }
+};
+
+class SignGradMaker : public framework::SingleGradOpDescMaker {
+ public:
+  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
+
+  std::unique_ptr<framework::OpDescBind> Apply() const override {
+    auto *grad_op = new framework::OpDescBind();
+    grad_op->SetType("scale");
+    grad_op->SetInput("X", OutputGrad("Out"));
+    grad_op->SetOutput("Out", InputGrad("X"));
+    grad_op->SetAttr("scale", 0.0f);
+    return std::unique_ptr<framework::OpDescBind>(grad_op);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OPERATOR(sign, ops::SignOp, ops::SignOpMaker<float>,
+                  ops::SignGradMaker);
+REGISTER_OP_CPU_KERNEL(sign,
+                       ops::SignKernel<paddle::platform::CPUPlace, float>);
--- a/paddle/operators/sign_op.cu
+++ b/paddle/operators/sign_op.cu
@ -0,0 +1,18 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/sign_op.h"
+
+REGISTER_OP_GPU_KERNEL(
+    sign, paddle::operators::SignKernel<paddle::platform::GPUPlace, float>);
--- a/paddle/operators/sign_op.h
+++ b/paddle/operators/sign_op.h
@ -0,0 +1,38 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#pragma once
+
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+template <typename Place, typename T>
+class SignKernel : public framework::OpKernel<T> {
+ public:
+  virtual void Compute(const framework::ExecutionContext& context) const {
+    auto* out = context.Output<framework::Tensor>("Out");
+    auto* in = context.Input<framework::Tensor>("X");
+    out->mutable_data<T>(in->place());
+
+    auto eigen_out = framework::EigenVector<T>::Flatten(*out);
+    auto eigen_in = framework::EigenVector<T>::Flatten(*in);
+    auto& place = context.GetEigenDevice<Place>();
+    eigen_out.device(place) = eigen_in.sign();
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
--- a/paddle/pybind/CMakeLists.txt
+++ b/paddle/pybind/CMakeLists.txt
@ -1,7 +1,7 @@
 if(WITH_PYTHON)
  cc_library(paddle_pybind SHARED
    SRCS pybind.cc exception.cc protobuf.cc
-    DEPS pybind python backward proto_desc tensor_array paddle_memory executor
+    DEPS pybind python backward proto_desc tensor_array paddle_memory executor prune
    ${GLOB_OP_LIB})
 endif(WITH_PYTHON)

--- a/paddle/pybind/protobuf.cc
+++ b/paddle/pybind/protobuf.cc
@ -141,6 +141,13 @@ void BindProgramDesc(py::module &m) {
                 desc->SerializeToString(&res),
                 "Serialize ProgramDesc Error. This could be a bug of Paddle.");
             return res;
+           })
+      .def("parse_from_string",
+           [](ProgramDescBind &program_desc, const std::string &data) {
+             ProgramDesc *desc = program_desc.Proto();
+             PADDLE_ENFORCE(desc->ParseFromString(data),
+                            "Fail to parse ProgramDesc from string. This could "
+                            "be a bug of Paddle.");
           });
 }

--- a/paddle/pybind/pybind.cc
+++ b/paddle/pybind/pybind.cc
@ -19,6 +19,7 @@ limitations under the License. */
 #include "paddle/framework/feed_fetch_method.h"
 #include "paddle/framework/framework.pb.h"
 #include "paddle/framework/lod_tensor.h"
+#include "paddle/framework/prune.h"
 #include "paddle/framework/selected_rows.h"
 #include "paddle/framework/tensor_array.h"
 #include "paddle/operators/cond_op.h"
@ -237,6 +238,16 @@ All parameter, weight, gradient are variables in Paddle.
    }
    return ret_values;
  });
+  m.def("prune", [](const ProgramDescBind &origin,
+                    const std::vector<std::array<size_t, 2>> &targets) {
+    ProgramDescBind prog_with_targets(origin);
+    for (const auto &t : targets) {
+      prog_with_targets.Block(t[0])->Op(t[1])->MarkAsTarget();
+    }
+    ProgramDesc pruned_desc;
+    Prune(*prog_with_targets.Proto(), &pruned_desc);
+    return new ProgramDescBind(pruned_desc);
+  });
  m.def_submodule(
       "var_names",
       "The module will return special predefined variable name in Paddle")
--- a/python/paddle/v2/framework/framework.py
+++ b/python/paddle/v2/framework/framework.py
@ -251,6 +251,8 @@ class Operator(object):
                self.desc.set_output(out_proto.name, out_argu_names)

        if attrs is not None:
+            if not isinstance(attrs, dict):
+                raise TypeError("'attrs' should be a dict.")
            for attr in proto.attrs:
                attr_name = attr.name
                if (not attr_name in attrs) or (attrs[attr_name] is None):
@ -291,6 +293,14 @@ class Operator(object):
    def output_names(self):
        return self.desc.output_names()

+    @property
+    def idx(self):
+        for i, op in enumerate(self.block.ops):
+            if op == self:
+                return i
+        raise ValueError(
+            "Can't find op itself in it's block. It could be a bug of Paddle.")
+
    def has_attr(self, name):
        return self.desc.has_attr(name)

@ -440,10 +450,31 @@ class Program(object):
        p.sync_with_cpp()
        return p

+    def prune(self, targets):
+        if not isinstance(targets, list):
+            targets = [targets]
+        targets_idx = []
+        for t in targets:
+            if not isinstance(t, Operator):
+                if isinstance(t, Variable):
+                    t = t.op
+                else:
+                    raise ValueError(
+                        "All targets of prune() can only be Variable or Operator."
+                    )
+
+            targets_idx.append([t.block.idx, t.idx])
+        res = Program()
+        res.desc = core.prune(self.desc, targets_idx)
+        res.blocks = [Block(res, i) for i in xrange(res.desc.num_blocks())]
+        res.sync_with_cpp()
+        return res
+
    @staticmethod
    def parse_from_string(binary_str):
        p = Program()
        p.desc = core.ProgramDesc(binary_str)
+        p.blocks = [Block(p, i) for i in xrange(p.desc.num_blocks())]
        p.sync_with_cpp()
        return p

--- a/python/paddle/v2/framework/io.py
+++ b/python/paddle/v2/framework/io.py
@ -1,11 +1,12 @@
 import os
+import cPickle as pickle

 from paddle.v2.framework.framework import Program, Parameter, g_program, \
    Variable

 __all__ = [
    'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params',
-    'load_persistables'
+    'load_persistables', "save_inference_model", "load_inference_model"
 ]


@ -124,6 +125,7 @@ def load_vars(executor, dirname, program=None, vars=None, predicate=None):
                inputs={},
                outputs={"Out": [new_var]},
                attrs={'file_path': os.path.join(dirname, new_var.name)})
+
        executor.run(load_prog)


@ -141,3 +143,88 @@ def load_persistables(executor, dirname, program=None):
    """
    load_vars(
        executor, dirname=dirname, program=program, predicate=is_persistable)
+
+
+def save_inference_model(dirname,
+                         feeded_var_names,
+                         target_vars,
+                         executor,
+                         program=None):
+    """
+    Build a model especially for inference, 
+    and save it to directory by the executor.
+
+    :param dirname: directory path
+    :param feeded_var_names: Names of variables that need to be feeded data during inference
+    :param target_vars: Variables from which we can get inference results.
+    :param executor: executor that save inference model
+    :param program: original program, which will be pruned to build the inference model. 
+    Default g_program.
+
+    :return: None
+    """
+    if program is None:
+        program = g_program
+    if not isinstance(target_vars, list):
+        target_vars = [target_vars]
+
+    if not os.path.isdir(dirname):
+        os.makedirs(dirname)
+
+    pruned_program = program.prune(target_vars)
+    fetch_var_names = [v.name for v in target_vars]
+
+    model_file_name = dirname + "/__model__"
+    with open(model_file_name, "w") as f:
+        pickle.dump({
+            "program_desc_str": pruned_program.desc.serialize_to_string(),
+            "feed_var_names": feeded_var_names,
+            "fetch_var_names": fetch_var_names
+        }, f, -1)
+
+    save_params(executor, dirname, program)
+
+
+def load_persistables_if_exist(executor, dirname, program=None):
+    filenames = next(os.walk(dirname))[2]
+    filenames = set(filenames)
+
+    def _is_presistable_and_exist_(var):
+        if not is_persistable(var):
+            return False
+        else:
+            return var.name in filenames
+
+    load_vars(
+        executor,
+        dirname,
+        program=program,
+        vars=None,
+        predicate=_is_presistable_and_exist_)
+
+
+def load_inference_model(dirname, executor):
+    """
+    Load inference model from a directory
+
+    :param dirname: directory path
+    :param executor: executor that load inference model
+
+    :return: [program, feed_var_names, fetch_var_names]
+             program: program especially for inference.
+             feeded_var_names: Names of variables that need to feed data
+             fetch_vars: Variables from which we can get inference results.
+    """
+    if not os.path.isdir(dirname):
+        raise ValueError("There is no directory named '%s'", dirname)
+
+    model_file_name = dirname + "/__model__"
+    model = pickle.load(open(model_file_name, "r"))
+    program_desc_str = model["program_desc_str"]
+    feed_var_names = model["feed_var_names"]
+    fetch_var_names = model["fetch_var_names"]
+    program = Program.parse_from_string(program_desc_str)
+    load_persistables_if_exist(executor, dirname, program)
+    fetch_vars = [program.global_block().var(name) for name in fetch_var_names]
+
+    return [program, feed_var_names, fetch_vars]
--- a/python/paddle/v2/framework/layer_helper.py
+++ b/python/paddle/v2/framework/layer_helper.py
@ -131,12 +131,14 @@ class LayerHelper(object):
        return dtype

    def create_parameter(self, attr, shape, dtype, suffix='w'):
-        if attr['name'] is None:
-            attr['name'] = unique_name(".".join([self.name, suffix]))
+        # Deepcopy the attr so that parameters can be shared in program
+        attr_copy = copy.deepcopy(attr)
+        if attr_copy['name'] is None:
+            attr_copy['name'] = unique_name(".".join([self.name, suffix]))
        self.init_program.global_block().create_parameter(
-            dtype=dtype, shape=shape, **attr)
+            dtype=dtype, shape=shape, **attr_copy)
        return self.program.global_block().create_parameter(
-            name=attr['name'], dtype=dtype, shape=shape)
+            name=attr_copy['name'], dtype=dtype, shape=shape)

    def create_tmp_variable(self, dtype):
        return self.program.current_block().create_var(
--- a/python/paddle/v2/framework/optimizer.py
+++ b/python/paddle/v2/framework/optimizer.py
@ -18,7 +18,8 @@ class Optimizer(object):
    but need to use one of it's implementation.
    """

-    def __init__(self):
+    def __init__(self, global_step=None):
+        self._global_step = global_step
        # Dictionary of accumulators. Some optimizer subclasses need to
        # allocate and manage extra variables associated with the parameters
        # to train. These variables are called accumulators.
@ -109,6 +110,26 @@ class Optimizer(object):
                            format(name, param.name))
        return self._accumulators[name][param.name]

+    def _increment_global_step(self, block):
+        """Increment the global step by 1 after every iteration
+
+        Args:
+            block: the block in which the loss variable is present
+
+        Returns:
+            list with global_step increment op as its only element
+        """
+        assert isinstance(block, framework.Block)
+        assert self._global_step is not None
+        # create the increment op
+        increment_op = block.append_op(
+            type="increment",
+            inputs={"X": self._global_step},
+            outputs={"Out": self._global_step},
+            attrs={"step": 1.0})
+
+        return increment_op
+
    def create_optimization_pass(self, parameters_and_grads, loss):
        """Add optimization operators to update gradients to variables.

@ -152,6 +173,8 @@ class Optimizer(object):
        if finish_ops is not None:
            return_ops += finish_ops

+        if self._global_step is not None:
+            return_ops.append(self._increment_global_step(loss.block))
        return return_ops

    def minimize(self, loss, parameter_list=None, no_grad_set=None):
@ -172,9 +195,9 @@ class SGDOptimizer(Optimizer):
    """ Simple SGD optimizer without any state.
    """

-    def __init__(self, learning_rate):
+    def __init__(self, learning_rate, global_step=None):
        assert learning_rate is not None
-        super(SGDOptimizer, self).__init__()
+        super(SGDOptimizer, self).__init__(global_step)
        self.type = "sgd"
        self._learning_rate = learning_rate

@ -215,10 +238,14 @@ class MomentumOptimizer(Optimizer):
    """
    _velocity_acc_str = "velocity"

-    def __init__(self, learning_rate, momentum, use_nesterov=False):
+    def __init__(self,
+                 learning_rate,
+                 momentum,
+                 use_nesterov=False,
+                 global_step=None):
        assert learning_rate is not None
        assert momentum is not None
-        super(MomentumOptimizer, self).__init__()
+        super(MomentumOptimizer, self).__init__(global_step)
        self.type = "momentum"
        self._learning_rate = learning_rate
        self._momentum = momentum
@ -275,10 +302,10 @@ class AdagradOptimizer(Optimizer):
    """
    _moment_acc_str = "moment"

-    def __init__(self, learning_rate, epsilon=1.0e-6):
+    def __init__(self, learning_rate, epsilon=1.0e-6, global_step=None):
        assert learning_rate is not None
        assert epsilon is not None
-        super(AdagradOptimizer, self).__init__()
+        super(AdagradOptimizer, self).__init__(global_step)
        self.type = "adagrad"
        self._learning_rate = learning_rate
        self._epsilon = epsilon
@ -337,12 +364,13 @@ class AdamOptimizer(Optimizer):
                 learning_rate=0.001,
                 beta1=0.9,
                 beta2=0.999,
-                 epsilon=1e-8):
+                 epsilon=1e-8,
+                 global_step=None):
        assert learning_rate is not None
        assert beta1 is not None
        assert beta2 is not None
        assert epsilon is not None
-        super(AdamOptimizer, self).__init__()
+        super(AdamOptimizer, self).__init__(global_step)
        self.type = "adam"
        self._learning_rate = learning_rate
        self._beta1 = beta1
@ -458,7 +486,8 @@ class AdamaxOptimizer(Optimizer):
                 learning_rate=0.001,
                 beta1=0.9,
                 beta2=0.999,
-                 epsilon=1e-8):
+                 epsilon=1e-8,
+                 global_step=None):
        assert learning_rate is not None
        assert beta1 is not None
        assert beta2 is not None
--- a/python/paddle/v2/framework/regularizer.py
+++ b/python/paddle/v2/framework/regularizer.py
@ -1,6 +1,8 @@
 import paddle.v2.framework.framework as framework

-__all__ = ['append_regularization_ops', 'L2DecayRegularizer']
+__all__ = [
+    'append_regularization_ops', 'L2DecayRegularizer', 'L1DecayRegularizer'
+]


 def append_regularization_ops(parameters_and_grads):
@ -97,3 +99,43 @@ class L2DecayRegularizer(WeightDecayRegularizer):
            attrs={"scale": self._regularization_coeff})

        return decay
+
+
+class L1DecayRegularizer(WeightDecayRegularizer):
+    """Implements the L1 Weight Decay Regularization
+    """
+
+    def __init__(self, regularization_coeff=0.0):
+        assert regularization_coeff is not None
+        super(L1DecayRegularizer, self).__init__()
+        self._regularization_coeff = regularization_coeff
+
+    def __call__(self, param, block):
+        """Add L1 weight decay ops to network
+
+        Adds L1 weight decay ops.
+        L1WeightDecay = reg_coeff * sign(parameter)
+
+        Args:
+            param: parameter variable for which regularization is applied
+            block: block in which variable is to be created
+
+        Returns:
+            new variable for weight decay
+        """
+        assert isinstance(param, framework.Parameter)
+        assert isinstance(block, framework.Block)
+        decay = block.create_var(
+            dtype="float32", shape=param.shape, lod_level=param.lod_level)
+        # Append sign op
+        block.append_op(
+            type='sign', inputs={"X": param}, outputs={"Out": decay})
+
+        # Append scale op to the output of sign op
+        block.append_op(
+            type='scale',
+            inputs={"X": decay},
+            outputs={"Out": decay},
+            attrs={"scale": self._regularization_coeff})
+
+        return decay
--- a/Show More
+++ b/Show More