feature/while_grad_op (#5554)

* first commit * Python API for while op * Python Unittest for simple while_op forward * fix out to be list * Fix UT * VarType * Fix several bugs * Fix bug * Fix bug * Fix Bug * Fix bug * Fix unittest * Remove debug log * Add comments * add PADDLE_ENFORCE * while_grad_op first commit * Add `BlockDescBind::FindRecursiveOrCreateVar()` and fix bugs * not sure how to setdim of while outputs * push for test * add executor vlog * fix bug of while_op cond * Several enhancement for code 1. Backward always infer shape & infer var type. Since there are RENAME variables will be created when creating backward operator, but their shape & var types are not inferenced. 2. Never use SomePtr-> directly, since every pointer could be nullptr if it is a function return value. Add `detail::Ref` to cast pointer to reference safely. 3. Enhance error message for backward. 4. Infer data type of variable in `sum` and `tensor_write` * Fix bugs of while_op gradient * Fix several bugs of while_op grad * fix fill zeros like * fix 3 >= 3 * fix place holder shouldn't be null * fail on sum op * Fix SumOp of TensorList * clean up * pass while test * fix test_array_write_read * pass sum op * Support int/int64 for fill_constant_batch_size_like * Fix compile
8 years ago · 18f0c40a97
parent 08bc08d64e
commit 18f0c40a97
23 changed files with 378 additions and 104 deletions
--- a/paddle/framework/backward.cc
+++ b/paddle/framework/backward.cc
@ -270,6 +270,19 @@ static bool AllGradInSet(const std::vector<std::string>& names,
      return false;
    }
  }
  if (VLOG_IS_ON(10)) {
    std::ostringstream sout;
    sout << "All input {";
    for (auto& name : names) {
      sout << name << ",";
    }
    sout << "} is in {";
    for (auto& name : set) {
      sout << name << ",";
    }
    sout << "}";
    VLOG(10) << sout.str();
  }
  return true;
 }
@ -290,14 +303,12 @@ static void CreateGradVarInBlock(
  auto ops = block_desc->AllOps();
  for (size_t op_index = grad_op_start_index; op_index < ops.size();
       ++op_index) {
    bool need_infer_shape = false;
    std::unordered_set<std::string> new_vars;
    ForEachVarName(ops[op_index]->Outputs(),
                   [&](const std::string& grad_var_name) {
                     if (block_desc->HasVar(grad_var_name)) {
                       return false;
                     }
                     need_infer_shape = true;
                     auto var = block_desc->Var(grad_var_name);
                     new_vars.insert(var->Name());
                     auto it = param_name_map.find(grad_var_name);
@ -311,7 +322,6 @@ static void CreateGradVarInBlock(
                     grad_record.op_idx_ = static_cast<int>(op_index);
                     return false; /* not break */
                   });
    if (need_infer_shape) {
    ops[op_index]->InferVarType(block_desc);
    for (auto& arg : ops[op_index]->OutputArgumentNames()) {
      if (new_vars.find(arg) == new_vars.end()) {
@ -329,7 +339,6 @@ static void CreateGradVarInBlock(
    ops[op_index]->InferShape(*block_desc);
  }
 }
 }
 std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
    const OpDescBind* op_desc, std::unordered_set<std::string>* no_grad_vars,
@ -387,6 +396,7 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
    ProgramDescBind& program_desc, int block_idx,
    std::unordered_set<std::string>* no_grad_vars,
    std::unordered_map<std::string, std::string>* grad_to_var) {
  VLOG(5) << "MakeBlockBackward";
  BlockDescBind* cur_block = program_desc.MutableBlock(block_idx);
  std::vector<OpDescBind*> op_descs = cur_block->AllOps();
  std::unordered_map<std::string, std::vector<size_t>> dup_out_ops;
@ -394,9 +404,10 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
  std::vector<std::unique_ptr<OpDescBind>> backward_descs;
  for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) {
    VLOG(5) << "Making backward " << (*it)->Type() << " op";
    std::vector<std::unique_ptr<OpDescBind>> op_grads;
-    if ((*it)->Type() == "recurrent") {
+    if ((*it)->Type() == "recurrent" || (*it)->Type() == "while") {
      int step_block_idx = (*it)->GetBlockAttr("step_block");
      BlockDescBind* backward_block = CreateStepBlock(
          program_desc, no_grad_vars, grad_to_var, step_block_idx);
@ -410,6 +421,15 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
      op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var);
    }
    if (VLOG_IS_ON(10)) {
      std::ostringstream sout;
      sout << "Made ";
      for (auto& op_grad : op_grads) {
        sout << op_grad->Type() << " ";
      }
      VLOG(10) << sout.str();
    }
    for (const auto& desc : op_grads) {
      for (const std::string& out_name : desc->OutputArgumentNames()) {
        if (out_name.find("@GRAD") == std::string::npos) {
@ -425,6 +445,8 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
        op_grads.begin(), op_grads.end(), std::back_inserter(backward_descs),
        [](std::unique_ptr<OpDescBind>& ptr) { return std::move(ptr); });
  }
  VLOG(5) << "Appending Sums";
  // Check whether some variables are written more than once
  std::list<std::pair<size_t, std::unique_ptr<OpDescBind>>> pending_sum_ops;
  for (const auto& dup : dup_out_ops) {
@ -432,16 +454,22 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
    const std::vector<size_t> dup_op = dup.second;
    if (out_name != kEmptyVarName && dup_op.size() > 1) {
      std::vector<std::string> sum_op_inputs;
      std::string next_g_name = out_name;
      for (size_t i = 0; i < dup_op.size(); ++i) {
        VLOG(10) << backward_descs[dup_op[i]]->Type() << " has " << out_name
                 << " duplicated";
        std::string new_name = out_name + "@RENAME@" + std::to_string(i);
-        backward_descs[dup_op[i]]->Rename(out_name, new_name);
+        backward_descs[dup_op[i]]->RenameOutput(out_name, new_name);
        backward_descs[dup_op[i]]->RenameInput(out_name, next_g_name);
        sum_op_inputs.emplace_back(new_name);
        next_g_name = sum_op_inputs.back();
      }
      std::unique_ptr<OpDescBind> sum_op(new OpDescBind(
          "sum", {{"X", sum_op_inputs}}, {{"Out", {out_name}}}, {}));
      pending_sum_ops.push_back({dup_op.back(), std::move(sum_op)});
    }
  }
  pending_sum_ops.sort(
      [](const std::pair<size_t, std::unique_ptr<OpDescBind>>& a,
         const std::pair<size_t, std::unique_ptr<OpDescBind>>& b) {
@ -452,6 +480,8 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
                          std::move(p.second));
  }
  VLOG(5) << "MakeBlockBackward Finished";
  return backward_descs;
 }
--- a/paddle/framework/data_type.h
+++ b/paddle/framework/data_type.h
@ -29,6 +29,8 @@ inline DataType ToDataType(std::type_index type) {
    return DataType::INT32;
  } else if (typeid(int64_t).hash_code() == type.hash_code()) {
    return DataType::INT64;
  } else if (typeid(bool).hash_code() == type.hash_code()) {
    return DataType::BOOL;
  } else {
    PADDLE_THROW("Not supported");
  }
--- a/paddle/framework/ddim.cc
+++ b/paddle/framework/ddim.cc
@ -60,8 +60,7 @@ void make_ddim(DDim& ddim, const int64_t* dims, int n) {
      ddim = make_dim<9>(dims);
      break;
    default:
-      throw std::invalid_argument(
+      PADDLE_THROW("Dynamic dimensions must have between [1, 9] dimensions.");
          "Dynamic dimensions must have between [1, 9] dimensions.");
  }
 }
--- a/paddle/framework/executor.cc
+++ b/paddle/framework/executor.cc
@ -120,6 +120,7 @@ void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id,
  for (auto& op_desc : block.AllOps()) {
    auto op = paddle::framework::OpRegistry::CreateOp(*op_desc);
    VLOG(10) << op->DebugString();
    op->Run(*local_scope, *device);
  }
  if (create_local_scope) {
--- a/paddle/framework/op_desc.cc
+++ b/paddle/framework/op_desc.cc
@ -235,6 +235,23 @@ void OpDescBind::Rename(const std::string &old_name,
  need_update_ = true;
 }
 void OpDescBind::RenameOutput(const std::string &old_name,
                              const std::string &new_name) {
  for (auto &output : outputs_) {
    std::replace(output.second.begin(), output.second.end(), old_name,
                 new_name);
  }
  need_update_ = true;
 }
 void OpDescBind::RenameInput(const std::string &old_name,
                             const std::string &new_name) {
  for (auto &input : inputs_) {
    std::replace(input.second.begin(), input.second.end(), old_name, new_name);
  }
  need_update_ = true;
 }
 struct SetAttrDescVisitor : public boost::static_visitor<void> {
  explicit SetAttrDescVisitor(OpDesc::Attr *attr) : attr_(attr) {}
  mutable OpDesc::Attr *attr_;
@ -448,7 +465,12 @@ const std::vector<std::string> &CompileTimeInferShapeContext::Outputs(
 DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const {
  auto var = block_.FindVarRecursive(name);
  PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s", name);
  try {
    return framework::make_ddim(var->Shape());
  } catch (...) {
    VLOG(5) << "GetDim of variable " << name << " error";
    std::rethrow_exception(std::current_exception());
  }
 }
 void CompileTimeInferShapeContext::SetDim(const std::string &name,
--- a/paddle/framework/op_desc.h
+++ b/paddle/framework/op_desc.h
@ -73,6 +73,10 @@ class OpDescBind {
  void Rename(const std::string &old_name, const std::string &new_name);
  void RenameOutput(const std::string &old_name, const std::string &new_name);
  void RenameInput(const std::string &old_name, const std::string &new_name);
  // Only be used in C++
  const AttributeMap &GetAttrMap() const;
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@ -403,19 +403,6 @@ class RuntimeInferShapeContext : public InferShapeContext {
 void OperatorWithKernel::Run(const Scope& scope,
                             const platform::DeviceContext& dev_ctx) const {
  if (VLOG_IS_ON(1)) {
    auto inputs = this->InputVars();
    auto outputs = this->OutputVars(true);
    std::ostringstream sout;
    sout << "Run operator " << this->Type() << " From [";
    std::ostream_iterator<std::string> out_it(sout, ",");
    std::copy(inputs.begin(), inputs.end(), out_it);
    sout << "] to [";
    std::copy(outputs.begin(), outputs.end(), out_it);
    sout << "]";
    VLOG(1) << sout.str();
  }
  RuntimeInferShapeContext infer_shape_ctx(*this, scope);
  this->InferShape(&infer_shape_ctx);
--- a/paddle/framework/scope.cc
+++ b/paddle/framework/scope.cc
@ -38,11 +38,12 @@ Scope& Scope::NewScope() const {
 Variable* Scope::Var(const std::string& name) {
  auto iter = vars_.find(name);
  if (iter != vars_.end()) {
    VLOG(3) << "Get existing variable " << name;
    return iter->second;
  }
  Variable* v = new Variable();
  vars_[name] = v;
-  VLOG(3) << "Create variable " << name << " on scope";
+  VLOG(3) << "Create variable " << name;
  v->name_ = &(vars_.find(name)->first);
  return v;
 }
--- a/paddle/framework/shape_inference.h
+++ b/paddle/framework/shape_inference.h
@ -53,6 +53,10 @@ class InferShapeContext {
  virtual bool IsRuntime() const = 0;
  // Note: In while op, we need this to be public
  void SetDims(const std::vector<std::string> &names,
               const std::vector<framework::DDim> &dims);
 protected:
  virtual framework::DDim GetDim(const std::string &name) const = 0;
  virtual void SetDim(const std::string &name, const framework::DDim &dim) = 0;
@ -60,9 +64,6 @@ class InferShapeContext {
  std::vector<framework::DDim> GetDims(
      const std::vector<std::string> &names) const;
  void SetDims(const std::vector<std::string> &names,
               const std::vector<framework::DDim> &dims);
  std::vector<VarDesc::VarType> GetVarTypes(
      const std::vector<std::string> &names) const;
--- a/paddle/operators/array_operator.h
+++ b/paddle/operators/array_operator.h
@ -42,6 +42,7 @@ class ArrayOp : public framework::OperatorBase {
    } else {
      offset = static_cast<size_t>(*i_tensor.data<int64_t>());
    }
    VLOG(10) << " Offset = " << offset;
    return offset;
  }
 };
--- a/paddle/operators/detail/safe_ref.h
+++ b/paddle/operators/detail/safe_ref.h
@ -0,0 +1,31 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
   http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */
 #pragma once
 namespace paddle {
 namespace operators {
 namespace detail {
 /**
 * Get Reference From Pointer with check. The error message is printf format,
 * and passed by `args`
 */
 template <typename T, typename... ARGS>
 inline T &Ref(T *ptr, ARGS &&... args) {
  PADDLE_ENFORCE(ptr != nullptr, args...);
  return *ptr;
 }
 }  // namespace detail
 }  // namespace operators
 }  // namespace paddle
--- a/paddle/operators/fill_constant_batch_size_like_op.cc
+++ b/paddle/operators/fill_constant_batch_size_like_op.cc
@ -101,4 +101,7 @@ REGISTER_OPERATOR(fill_constant_batch_size_like,
 REGISTER_OP_CPU_KERNEL(
    fill_constant_batch_size_like,
    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::CPUPlace, float>,
-    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::CPUPlace, double>);
+    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::CPUPlace, double>,
    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::CPUPlace, int>,
    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::CPUPlace,
                                           int64_t>);
--- a/paddle/operators/fill_constant_batch_size_like_op.cu.cc
+++ b/paddle/operators/fill_constant_batch_size_like_op.cu.cc
@ -19,4 +19,7 @@ namespace ops = paddle::operators;
 REGISTER_OP_GPU_KERNEL(
    fill_constant_batch_size_like,
    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::GPUPlace, float>,
-    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::GPUPlace, double>);
+    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::GPUPlace, double>,
    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::GPUPlace, int>,
    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::GPUPlace,
                                           int64_t>);
--- a/paddle/operators/fill_zeros_like_op.cc
+++ b/paddle/operators/fill_zeros_like_op.cc
@ -54,5 +54,8 @@ namespace ops = paddle::operators;
 REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, ops::FillZerosLikeOp,
                             ops::FillZerosLikeOpMaker);
 REGISTER_OP_CPU_KERNEL(
-    fill_zeros_like,
+    fill_zeros_like, ops::FillZerosLikeKernel<paddle::platform::CPUPlace, int>,
-    ops::FillZerosLikeKernel<paddle::platform::CPUPlace, float>);
+    ops::FillZerosLikeKernel<paddle::platform::CPUPlace, int64_t>,
    ops::FillZerosLikeKernel<paddle::platform::CPUPlace, float>,
    ops::FillZerosLikeKernel<paddle::platform::CPUPlace, double>,
    ops::FillZerosLikeKernel<paddle::platform::CPUPlace, bool>);
--- a/paddle/operators/fill_zeros_like_op.cu.cc
+++ b/paddle/operators/fill_zeros_like_op.cu.cc
@ -17,5 +17,8 @@
 namespace ops = paddle::operators;
 REGISTER_OP_GPU_KERNEL(
-    fill_zeros_like,
+    fill_zeros_like, ops::FillZerosLikeKernel<paddle::platform::GPUPlace, int>,
-    ops::FillZerosLikeKernel<paddle::platform::GPUPlace, float>);
+    ops::FillZerosLikeKernel<paddle::platform::GPUPlace, int64_t>,
    ops::FillZerosLikeKernel<paddle::platform::GPUPlace, float>,
    ops::FillZerosLikeKernel<paddle::platform::GPUPlace, double>,
    ops::FillZerosLikeKernel<paddle::platform::GPUPlace, bool>);
--- a/paddle/operators/math/math_function.cc
+++ b/paddle/operators/math/math_function.cc
@ -250,6 +250,8 @@ void axpy<platform::CPUPlace, double>(const platform::DeviceContext& context,
 template struct SetConstant<platform::CPUPlace, float>;
 template struct SetConstant<platform::CPUPlace, double>;
 template struct SetConstant<platform::CPUPlace, int>;
 template struct SetConstant<platform::CPUPlace, int64_t>;
 template struct SetConstant<platform::CPUPlace, bool>;
 #define DEFINE_CPU_TRANS(RANK)                                \
  template struct Transpose<platform::CPUPlace, float, RANK>; \
--- a/paddle/operators/math/math_function.cu
+++ b/paddle/operators/math/math_function.cu
@ -256,6 +256,8 @@ void axpy<platform::GPUPlace, double>(const platform::DeviceContext& context,
 template struct SetConstant<platform::GPUPlace, float>;
 template struct SetConstant<platform::GPUPlace, double>;
 template struct SetConstant<platform::GPUPlace, int>;
 template struct SetConstant<platform::GPUPlace, int64_t>;
 template struct SetConstant<platform::GPUPlace, bool>;
 #define DEFINE_GPU_TRANS(RANK)                                \
  template struct Transpose<platform::GPUPlace, float, RANK>; \
--- a/paddle/operators/sum_op.cc
+++ b/paddle/operators/sum_op.cc
@ -12,6 +12,7 @@ limitations under the License. */
 #include "paddle/operators/sum_op.h"
 #include <vector>
 #include "paddle/framework/var_type_inference.h"
 #include "paddle/operators/detail/safe_ref.h"
 namespace paddle {
 namespace operators {
@ -59,7 +60,8 @@ class SumOp : public framework::OperatorWithKernel {
              x_vars[0]->Get<framework::SelectedRows>().value().type()),
          ctx.device_context());
    } else if (x_vars[0]->IsType<framework::LoDTensorArray>()) {
-      auto& array = x_vars[0]->Get<framework::LoDTensorArray>();
+      for (auto& x_var : x_vars) {
        auto& array = x_var->Get<framework::LoDTensorArray>();
        for (auto& each : array) {
          if (each.numel() != 0) {
            return framework::OpKernelType(framework::ToDataType(each.type()),
@ -67,6 +69,8 @@ class SumOp : public framework::OperatorWithKernel {
          }
        }
      }
      PADDLE_THROW("Cannot find the input data type by all input data");
    }
    PADDLE_THROW("Unexpected branch. Input type is %s",
                 x_vars[0]->Type().name());
  }
@ -96,6 +100,11 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
    auto& inputs = op_desc.Input("X");
    auto var_type = framework::VarDesc::SELECTED_ROWS;
    for (auto& name : op_desc.Input("X")) {
      VLOG(10) << name << " "
               << block->FindRecursiveOrCreateVar(name)->GetType();
    }
    bool any_input_is_lod_tensor = std::any_of(
        inputs.begin(), inputs.end(), [block](const std::string& name) {
          return block->FindRecursiveOrCreateVar(name)->GetType() ==
@ -103,7 +112,7 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
        });
    auto is_tensor_array = [block](const std::string& name) {
-      return block->FindRecursiveOrCreateVar(name)->GetType() ==
+      return detail::Ref(block->FindRecursiveOrCreateVar(name)).GetType() ==
             framework::VarDesc::LOD_TENSOR_ARRAY;
    };
@ -113,14 +122,26 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
        std::all_of(inputs.begin(), inputs.end(), is_tensor_array);
    if (any_input_is_tensor_array) {
-      PADDLE_ENFORCE(all_inputs_are_tensor_array);
+      if (!all_inputs_are_tensor_array) {
        std::ostringstream os;
        for (auto& each : inputs) {
          os << "    " << each << " type is "
             << detail::Ref(block->FindRecursiveOrCreateVar(each)).GetType()
             << "\n";
        }
        PADDLE_ENFORCE(all_inputs_are_tensor_array,
                       "Not all inputs are tensor array:\n%s", os.str());
      }
      var_type = framework::VarDesc::LOD_TENSOR_ARRAY;
    } else if (any_input_is_lod_tensor) {
      var_type = framework::VarDesc::LOD_TENSOR;
    }
    auto out_var_name = op_desc.Output("Out").front();
-    block->FindRecursiveOrCreateVar(out_var_name)->SetType(var_type);
+    auto& out_var = detail::Ref(block->FindRecursiveOrCreateVar(out_var_name));
    out_var.SetType(var_type);
    auto& in_var = detail::Ref(block->FindVarRecursive(inputs.front()));
    out_var.SetDataType(in_var.GetDataType());
  }
 };
--- a/paddle/operators/tensor_array_read_write_op.cc
+++ b/paddle/operators/tensor_array_read_write_op.cc
@ -12,7 +12,7 @@
   See the License for the specific language governing permissions and
   limitations under the License. */
 #include "paddle/operators/array_operator.h"
-
+#include "paddle/operators/detail/safe_ref.h"
 namespace paddle {
 namespace operators {
@ -33,6 +33,8 @@ class WriteToArrayOp : public ArrayOp {
    auto *out =
        scope.FindVar(Output("Out"))->GetMutable<framework::LoDTensorArray>();
    if (offset >= out->size()) {
      VLOG(10) << "Resize " << Output("Out") << " from " << out->size()
               << " to " << offset + 1;
      out->resize(offset + 1);
    }
    auto *out_tensor = &out->at(offset);
@ -85,11 +87,15 @@ class WriteToArrayInferVarType : public framework::VarTypeInference {
 public:
  void operator()(const framework::OpDescBind &op_desc,
                  framework::BlockDescBind *block) const override {
-    for (auto &out_var : op_desc.OutputArgumentNames()) {
+    auto x_name = op_desc.Input("X")[0];
-      VLOG(10) << "Set Variable " << out_var << " as LOD_TENSOR_ARRAY";
+    auto out_name = op_desc.Output("Out")[0];
-      block->FindRecursiveOrCreateVar(out_var)->SetType(
+    VLOG(10) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY";
-          framework::VarDesc::LOD_TENSOR_ARRAY);
+    auto &out = detail::Ref(block->FindRecursiveOrCreateVar(out_name),
-    }
+                            "Cannot found %s", out_name);
    out.SetType(framework::VarDesc::LOD_TENSOR_ARRAY);
    auto &x =
        detail::Ref(block->FindVarRecursive(x_name), "Cannot found %s", x_name);
    out.SetDataType(x.GetDataType());
  }
 };
@ -107,11 +113,11 @@ class ReadFromArrayOp : public ArrayOp {
    auto &x_array = x->Get<framework::LoDTensorArray>();
    auto *out = scope.FindVar(Output("Out"));
    PADDLE_ENFORCE(out != nullptr, "Out must be set");
-    auto *out_tesnor = out->GetMutable<framework::LoDTensor>();
+    auto *out_tensor = out->GetMutable<framework::LoDTensor>();
    size_t offset = GetOffset(scope, dev_ctx);
    PADDLE_ENFORCE_LT(offset, x_array.size());
-    out_tesnor->CopyFrom(x_array[offset], dev_ctx.GetPlace(), dev_ctx);
+    out_tensor->CopyFrom(x_array[offset], dev_ctx.GetPlace(), dev_ctx);
-    out_tesnor->set_lod(x_array[offset].lod());
+    out_tensor->set_lod(x_array[offset].lod());
  }
 };
--- a/paddle/operators/while_op.cc
+++ b/paddle/operators/while_op.cc
--- a/python/paddle/v2/fluid/framework.py
+++ b/python/paddle/v2/fluid/framework.py
@ -12,9 +12,9 @@ def unique_name(prefix):
    return "_".join([prefix, str(uid)])
-def _debug_string_(proto):
+def _debug_string_(proto, throw_on_error=True):
    error_fields = list()
-    if not proto.IsInitialized(error_fields):
+    if not proto.IsInitialized(error_fields) and throw_on_error:
        raise ValueError("{0} are not initialized\nThe message is {1}".format(
            error_fields, proto))
    return proto.__str__()
@ -101,9 +101,12 @@ class Variable(object):
        self.stop_gradient = stop_gradient
    def __str__(self):
        return self.to_string(True)
    def to_string(self, throw_on_error):
        protostr = self.desc.serialize_to_string()
        proto = framework_pb2.VarDesc.FromString(str(protostr))
-        return _debug_string_(proto)
+        return _debug_string_(proto, throw_on_error)
    __repr__ = __str__
@ -291,10 +294,13 @@ class Operator(object):
            self.desc.infer_var_type(self.block.desc)
            self.desc.infer_shape(self.block.desc)
-    def __str__(self):
+    def to_string(self, throw_on_error):
        protostr = self.desc.serialize_to_string()
        proto = framework_pb2.OpDesc.FromString(str(protostr))
-        return _debug_string_(proto)
+        return _debug_string_(proto, throw_on_error)
    def __str__(self):
        return self.to_string(True)
    __repr__ = __str__
@ -349,9 +355,12 @@ class Block(object):
        self.program = program
    def __str__(self):
        return self.to_string(True)
    def to_string(self, throw_on_error):
        protostr = self.desc.serialize_to_string()
        proto = framework_pb2.BlockDesc.FromString(str(protostr))
-        return _debug_string_(proto)
+        return _debug_string_(proto, throw_on_error)
    __repr__ = __str__
@ -454,9 +463,12 @@ class Program(object):
        self.current_block_idx = 0
    def __str__(self):
        return self.to_string(True)
    def to_string(self, throw_on_error):
        protostr = self.desc.serialize_to_string()
        proto = framework_pb2.ProgramDesc.FromString(str(protostr))
-        return _debug_string_(proto)
+        return _debug_string_(proto, throw_on_error)
    def clone(self):
        p = Program()
@ -512,7 +524,14 @@ class Program(object):
        assert isinstance(target, Variable)
        if no_grad_set is None:
            no_grad_set = set()
-        param_to_grad_info = self.desc.append_backward(target.desc, no_grad_set)
+        try:
            param_to_grad_info = self.desc.append_backward(target.desc,
                                                           no_grad_set)
        except Exception as e:
            raise core.EnforceNotMet(
                str(e) + "\nCurrent protobuf is\n{0}".format(
                    self.to_string(False)))
        self.sync_with_cpp()
        return param_to_grad_info
--- a/python/paddle/v2/fluid/net_drawer.py
+++ b/python/paddle/v2/fluid/net_drawer.py
@ -66,10 +66,13 @@ def parse_graph(program, graph, var_dict, **kwargs):
            if not var_dict.has_key(var):
                var_dict[var] = "Feed"
    temp_id = 0
    proto = framework_pb2.ProgramDesc.FromString(
        program.desc.serialize_to_string())
    for block in proto.blocks:
        for op in block.ops:
            op.type = op.type + "_" + str(temp_id)
            temp_id += 1
            graph.node(**draw_node(op))
            for o in op.outputs:
                for arg in o.arguments:
@ -78,6 +81,7 @@ def parse_graph(program, graph, var_dict, **kwargs):
                for arg in e.arguments:
                    if var_dict.has_key(arg):
                        graph.edge(**draw_edge(var_dict, op, e, arg))
        break  # only plot the first block
 def draw_graph(startup_program, main_program, **kwargs):
--- a/python/paddle/v2/fluid/tests/test_while_op.py
+++ b/python/paddle/v2/fluid/tests/test_while_op.py
@ -2,6 +2,7 @@ import unittest
 import paddle.v2.fluid.layers as layers
 from paddle.v2.fluid.executor import Executor
 import paddle.v2.fluid.core as core
 from paddle.v2.fluid.backward import append_backward_ops
 import numpy
@ -16,7 +17,7 @@ class TestWhileOp(unittest.TestCase):
        i = layers.zeros(shape=[1], dtype='int64')
        i.stop_gradient = True
        init = layers.zeros(shape=[10], dtype='float32')
-        mem_array = layers.array_write(init, i=i)
+        mem_array = layers.array_write(x=init, i=i)
        data_array = layers.array_write(x=d0, i=i)
        i = layers.increment(i)
@ -29,17 +30,23 @@ class TestWhileOp(unittest.TestCase):
        i.stop_gradient = True
        array_len = layers.fill_constant(shape=[1], dtype='int64', value=3)
        array_len.stop_gradient = True
        cond = layers.less_than(x=i, y=array_len)
        while_op = layers.While(cond=cond)
        with while_op.block():
            d = layers.array_read(array=data_array, i=i)
            prev = layers.array_read(array=mem_array, i=i)
            i = layers.increment(x=i, in_place=True)
            result = layers.sums(input=[d, prev])
            i = layers.increment(x=i, in_place=True)
            layers.array_write(result, i=i, array=mem_array)
            layers.less_than(x=i, y=array_len, cond=cond)
-        sum_result = layers.array_read(mem_array, i=array_len)
+
        sum_result = layers.array_read(array=mem_array, i=i)
        loss = layers.mean(x=sum_result)
        append_backward_ops(loss)
        cpu = core.CPUPlace()
        exe = Executor(cpu)