Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into port_python3_syntax

7 years ago · a58dd3e557
parent 850c3945f2 0964de119b
commit a58dd3e557
104 changed files with 2720 additions and 710 deletions
--- a/cmake/external/anakin.cmake
+++ b/cmake/external/anakin.cmake
@ -8,6 +8,7 @@ set(ANAKIN_INCLUDE "${ANAKIN_INSTALL_DIR}" CACHE STRING "root of Anakin header f
 set(ANAKIN_LIBRARY "${ANAKIN_INSTALL_DIR}" CACHE STRING "path of Anakin library")

 set(ANAKIN_COMPILE_EXTRA_FLAGS 
+    -Wno-error=unused-but-set-variable -Wno-unused-but-set-variable
    -Wno-error=unused-variable -Wno-unused-variable 
    -Wno-error=format-extra-args -Wno-format-extra-args
    -Wno-error=comment -Wno-comment 
@ -19,7 +20,7 @@ set(ANAKIN_COMPILE_EXTRA_FLAGS
    -Wno-reorder 
    -Wno-error=cpp)

-set(ANAKIN_LIBRARY_URL "https://github.com/pangge/Anakin/releases/download/3.0/anakin_release_simple.tar.gz")
+set(ANAKIN_LIBRARY_URL "https://github.com/pangge/Anakin/releases/download/Version0.1.0/anakin.tar.gz")

 # A helper function used in Anakin, currently, to use it, one need to recursively include
 # nearly all the header files.
@ -41,9 +42,9 @@ if (NOT EXISTS "${ANAKIN_INSTALL_DIR}")
    message(STATUS "Download Anakin library from ${ANAKIN_LIBRARY_URL}")
    execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}")
    execute_process(COMMAND bash -c "rm -rf ${ANAKIN_INSTALL_DIR}/*")
-    execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget -q ${ANAKIN_LIBRARY_URL}")
+    execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget --no-check-certificate -q ${ANAKIN_LIBRARY_URL}")
    execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}")
-    execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin_release_simple.tar.gz")
+    execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin.tar.gz")
 endif()

 if (WITH_ANAKIN)
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@ -263,9 +263,7 @@ paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=[], varargs='ar
 paddle.fluid.layers.scatter ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 paddle.fluid.layers.sum ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 paddle.fluid.layers.slice ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
-paddle.fluid.layers.polygon_box_transform ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 paddle.fluid.layers.shape ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
-paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 paddle.fluid.layers.maxout ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 paddle.fluid.layers.sigmoid ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 paddle.fluid.layers.logsigmoid ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
@ -306,7 +304,9 @@ paddle.fluid.layers.ssd_loss ArgSpec(args=['location', 'confidence', 'gt_box', '
 paddle.fluid.layers.detection_map ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral'))
 paddle.fluid.layers.rpn_target_assign ArgSpec(args=['loc', 'scores', 'anchor_box', 'gt_box', 'rpn_batch_size_per_im', 'fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap'], varargs=None, keywords=None, defaults=(256, 0.25, 0.7, 0.3))
 paddle.fluid.layers.anchor_generator ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None))
+paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 paddle.fluid.layers.box_coder ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
+paddle.fluid.layers.polygon_box_transform ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
 paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk'], varargs=None, keywords=None, defaults=('ROC', 200, 1))
 paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@ -7,6 +7,7 @@ cc_library(ddim SRCS ddim.cc DEPS eigen3 boost)
 cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
 nv_test(dim_test SRCS dim_test.cu DEPS ddim)
 cc_library(data_type SRCS data_type.cc DEPS framework_proto ddim device_context)
+cc_test(data_type_test SRCS data_type_test.cc DEPS data_type place tensor)
 if(WITH_GPU)
  nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context)
 else()
--- a/paddle/fluid/framework/data_type.cc
+++ b/paddle/fluid/framework/data_type.cc
@ -17,6 +17,8 @@
 #include <string>
 #include <unordered_map>

+using float16 = paddle::platform::float16;
+
 namespace paddle {
 namespace framework {

@ -53,7 +55,7 @@ static DataTypeMap* InitDataTypeMap() {
  RegisterType<cc_type>(retv, proto_type, #cc_type)

  // NOTE: Add your customize type here.
-  RegType(platform::float16, proto::VarType::FP16);
+  RegType(float16, proto::VarType::FP16);
  RegType(float, proto::VarType::FP32);
  RegType(double, proto::VarType::FP64);
  RegType(int, proto::VarType::INT32);
--- a/paddle/fluid/framework/data_type_test.cc
+++ b/paddle/fluid/framework/data_type_test.cc
@ -0,0 +1,40 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/fluid/framework/data_type.h"
+
+#include <string>
+#include "gtest/gtest.h"
+#include "paddle/fluid/framework/tensor.h"
+
+TEST(DataType, float16) {
+  using paddle::framework::Tensor;
+  using paddle::platform::CPUPlace;
+  using paddle::platform::float16;
+  namespace f = paddle::framework;
+  f::proto::VarType::Type dtype = f::proto::VarType::FP16;
+
+  Tensor tensor;
+  CPUPlace cpu;
+  tensor.mutable_data(cpu, f::ToTypeIndex(dtype));
+
+  // test fp16 tensor
+  EXPECT_EQ(tensor.type(), std::type_index(typeid(float16)));
+
+  // test fp16 size
+  EXPECT_EQ(f::SizeOfType(f::ToTypeIndex(dtype)), 2u);
+
+  // test debug info
+  std::string type = "float16";
+  EXPECT_STREQ(f::DataTypeToString(dtype).c_str(), type.c_str());
+}
--- a/paddle/fluid/framework/details/exception_holder.h
+++ b/paddle/fluid/framework/details/exception_holder.h
@ -0,0 +1,83 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/platform/enforce.h"
+
+namespace paddle {
+namespace framework {
+namespace details {
+
+class ExceptionHolder {
+ public:
+  void Catch(const platform::EnforceNotMet& exp) {
+    std::lock_guard<std::mutex> lock(mu_);
+    exception_.reset(new platform::EnforceNotMet(exp));
+    type_ = kEnforceNotMet;
+  }
+
+  void Catch(const platform::EOFException& exp) {
+    std::lock_guard<std::mutex> lock(mu_);
+    // EOFException will not cover up existing EnforceNotMet.
+    if (exception_.get() == nullptr) {
+      exception_.reset(new platform::EOFException(exp));
+      type_ = kEOF;
+    }
+  }
+
+  bool ExceptionCatched() const {
+    std::lock_guard<std::mutex> lock(mu_);
+    return exception_.get() != nullptr;
+  }
+
+  void Throw() {
+    std::lock_guard<std::mutex> lock(mu_);
+    switch (type_) {
+      case kNone:
+        break;
+      case kEnforceNotMet: {
+        auto e = *static_cast<platform::EnforceNotMet*>(exception_.get());
+        throw e;
+        break;
+      }
+      case kEOF: {
+        auto e = *static_cast<platform::EOFException*>(exception_.get());
+        throw e;
+        break;
+      }
+      default:
+        LOG(FATAL) << "Unknown exception.";
+    }
+    exception_.reset();
+    type_ = kNone;
+  }
+
+  void Clear() {
+    std::lock_guard<std::mutex> lock(mu_);
+    exception_.reset();
+    type_ = kNone;
+  }
+
+ private:
+  enum ExceptionType { kNone, kEnforceNotMet, kEOF };
+  ExceptionType type_{kNone};
+
+  std::unique_ptr<std::exception> exception_;
+  mutable std::mutex mu_;
+};
+
+}  // namespace details
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h
+++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h
@ -41,7 +41,9 @@ class ScopeBufferedSSAGraphExecutor : public SSAGraphExecutor {
      std::vector<VariableInfo> var_infos, std::vector<platform::Place> places,
      std::unique_ptr<SSAGraphExecutor>&& underlying_executor);

-  const ir::Graph& Graph() const { return underlying_executor_->Graph(); }
+  const ir::Graph& Graph() const override {
+    return underlying_executor_->Graph();
+  }

  FeedFetchList Run(const std::vector<std::string>& fetch_tensors) override;

--- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
+++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
@ -83,7 +83,7 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(

  // Clean run context
  run_op_futures_.clear();
-  exception_.reset();
+  exception_holder_.Clear();

  // Step 3. Execution
  while (!pending_vars.empty()) {
@ -103,23 +103,11 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
    auto cur_ready_vars = ready_vars.PopAll(1, &timeout);

    if (timeout) {
-      std::unique_lock<std::mutex> l(exception_mu_);
-      if (exception_) {
-        l.unlock();
+      if (exception_holder_.ExceptionCatched()) {
        for (auto &run_op_future : run_op_futures_) {
          run_op_future.wait();
        }
-        l.lock();
-        std::exception *exp = exception_.get();
-        if (dynamic_cast<platform::EOFException *>(exp)) {
-          auto e = *static_cast<platform::EOFException *>(exp);
-          throw e;
-        } else if (dynamic_cast<platform::EnforceNotMet *>(exp)) {
-          auto e = *static_cast<platform::EnforceNotMet *>(exp);
-          throw e;
-        } else {
-          LOG(FATAL) << "Unknown exception.";
-        }
+        exception_holder_.Throw();
      } else {
        continue;
      }
@ -229,14 +217,9 @@ void ThreadedSSAGraphExecutor::RunOp(
      ready_var_q->Extend(op->Outputs());
      VLOG(10) << op << " " << op->Name() << "Signal posted";
    } catch (platform::EOFException ex) {
-      std::lock_guard<std::mutex> l(exception_mu_);
-      // EOFException will not cover up existing EnforceNotMet.
-      if (exception_.get() == nullptr) {
-        exception_.reset(new platform::EOFException(ex));
-      }
+      exception_holder_.Catch(ex);
    } catch (platform::EnforceNotMet ex) {
-      std::lock_guard<std::mutex> l(exception_mu_);
-      exception_.reset(new platform::EnforceNotMet(ex));
+      exception_holder_.Catch(ex);
    } catch (...) {
      LOG(FATAL) << "Unknown exception catched";
    }
--- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h
+++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h
@ -24,6 +24,7 @@
 #include <functional>
 #include "ThreadPool.h"  // ThreadPool in thrird party
 #include "paddle/fluid/framework/blocking_queue.h"
+#include "paddle/fluid/framework/details/exception_holder.h"
 #include "paddle/fluid/framework/details/execution_strategy.h"
 #include "paddle/fluid/framework/details/fetch_op_handle.h"
 #include "paddle/fluid/framework/details/ssa_graph_executor.h"
@ -42,7 +43,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
                           const std::vector<platform::Place> &places,
                           std::unique_ptr<ir::Graph> &&graph);

-  const ir::Graph &Graph() const { return *graph_; }
+  const ir::Graph &Graph() const override { return *graph_; }
  // Run a SSAGraph by a thread pool
  // Use topological sort algorithm
  FeedFetchList Run(const std::vector<std::string> &fetch_tensors) override;
@ -59,8 +60,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
  std::vector<Scope *> local_scopes_;
  std::vector<platform::Place> places_;
  platform::DeviceContextPool fetch_ctxs_;
-  std::mutex exception_mu_;
-  std::unique_ptr<std::exception> exception_;
+  ExceptionHolder exception_holder_;
  std::atomic<int> running_ops_;

  void InsertPendingOp(std::unordered_map<OpHandleBase *, size_t> *pending_ops,
--- a/paddle/fluid/framework/ir/graph_helper_test.cc
+++ b/paddle/fluid/framework/ir/graph_helper_test.cc
@ -116,8 +116,8 @@ TEST(GraphHelperTest, Basic) {
  for (size_t i = 0; i < sorted.size(); ++i) {
    node_map[sorted[i]->Name()] = i;
  }
-  ASSERT_EQ(node_map.at("op1"), 0);
-  ASSERT_EQ(node_map.at("op2"), 1);
+  ASSERT_EQ(node_map.at("op1"), 0UL);
+  ASSERT_EQ(node_map.at("op2"), 1UL);
  ASSERT_TRUE(node_map.at("op3") < node_map.at("op5"));
 }
 }  // namespace ir
--- a/paddle/fluid/framework/ir/graph_test.cc
+++ b/paddle/fluid/framework/ir/graph_test.cc
@ -97,15 +97,15 @@ TEST(GraphTest, Basic) {
  std::vector<ir::Node *> nodes(g->Nodes().begin(), g->Nodes().end());
  for (ir::Node *n : nodes) {
    if (n->Name() == "sum") {
-      ASSERT_EQ(n->inputs.size(), 3);
-      ASSERT_EQ(n->outputs.size(), 1);
+      ASSERT_EQ(n->inputs.size(), 3UL);
+      ASSERT_EQ(n->outputs.size(), 1UL);
    } else if (n->Name() == "test_a" || n->Name() == "test_b" ||
               n->Name() == "test_c") {
-      ASSERT_EQ(n->inputs.size(), 0);
-      ASSERT_EQ(n->outputs.size(), 1);
+      ASSERT_EQ(n->inputs.size(), 0UL);
+      ASSERT_EQ(n->outputs.size(), 1UL);
    } else if (n->Name() == "test_out") {
-      ASSERT_EQ(n->inputs.size(), 1);
-      ASSERT_EQ(n->outputs.size(), 0);
+      ASSERT_EQ(n->inputs.size(), 1UL);
+      ASSERT_EQ(n->outputs.size(), 0UL);
    }
  }
  ASSERT_EQ(nodes.size(), 5);
--- a/paddle/fluid/framework/op_kernel_type_test.cc
+++ b/paddle/fluid/framework/op_kernel_type_test.cc
@ -29,6 +29,13 @@ TEST(OpKernelType, ToString) {
  ASSERT_EQ(paddle::framework::KernelTypeToString(op_kernel_type),
            "data_type[float]:data_layout[NCHW]:place[CPUPlace]:library_type["
            "CUDNN]");
+
+  using CUDAPlace = paddle::platform::CUDAPlace;
+  OpKernelType op_kernel_type2(DataType::FP16, CUDAPlace(0), DataLayout::kNCHW,
+                               LibraryType::kCUDNN);
+  ASSERT_EQ(paddle::framework::KernelTypeToString(op_kernel_type2),
+            "data_type[float16]:data_layout[NCHW]:place[CUDAPlace(0)]:library_"
+            "type[CUDNN]");
 }

 TEST(OpKernelType, Hash) {
--- a/paddle/fluid/framework/op_proto_maker.cc
+++ b/paddle/fluid/framework/op_proto_maker.cc
@ -40,6 +40,40 @@ OpProtoAndCheckerMaker::VariableBuilder OpProtoAndCheckerMaker::AddOutput(
  return OpProtoAndCheckerMaker::VariableBuilder{output};
 }

+void OpProtoAndCheckerMaker::Reuse(const std::string& name,
+                                   const std::string& reused_name) {
+  bool found = false;
+  proto::OpProto::Var* var;
+
+  for (auto& var : proto_->inputs()) {
+    if (var.name() == reused_name) {
+      found = true;
+      break;
+    }
+  }
+  PADDLE_ENFORCE(found == true,
+                 "Input/Output name: %s reused_name: %s, one of them is not "
+                 "exists or not matched.",
+                 name, reused_name);
+
+  found = false;
+  for (int i = 0; i < proto_->outputs().size(); ++i) {
+    var = proto_->mutable_outputs()->Mutable(i);
+    if (var->name() == name) {
+      PADDLE_ENFORCE(!var->has_reuse(),
+                     "Output(%s) has been set reused var of %s", name,
+                     var->reuse());
+      found = true;
+      var->set_reuse(reused_name);
+      break;
+    }
+  }
+  PADDLE_ENFORCE(found == true,
+                 "Input/Output name: %s reused_name: %s, one of them is not "
+                 "exists or not matched.",
+                 name, reused_name);
+}
+
 void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() {
  std::unordered_set<std::string> names;
  auto checker = [&](const std::string& name) {
--- a/paddle/fluid/framework/op_proto_maker.h
+++ b/paddle/fluid/framework/op_proto_maker.h
@ -78,6 +78,8 @@ class OpProtoAndCheckerMaker {
  VariableBuilder AddOutput(const std::string &name,
                            const std::string &comment);

+  void Reuse(const std::string &name, const std::string &reused_name);
+
  template <typename T>
  TypedAttrChecker<T> &AddAttr(const std::string &name,
                               const std::string &comment,
--- a/paddle/fluid/framework/op_proto_maker_test.cc
+++ b/paddle/fluid/framework/op_proto_maker_test.cc
@ -49,6 +49,15 @@ TEST(ProtoMaker, DuplicatedInOut) {
 }

 class TestInplaceProtoMaker : public paddle::framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() {
+    AddInput("X", "input of test op");
+    AddOutput("XOut", "output of test op").Reuse("X");
+  }
+};
+
+class TestInplaceProtoMaker2
+    : public paddle::framework::OpProtoAndCheckerMaker {
 public:
  void Make() {
    AddInput("X", "input of test op");
@ -58,12 +67,100 @@ class TestInplaceProtoMaker : public paddle::framework::OpProtoAndCheckerMaker {
 };

 TEST(ProtoMaker, InplaceOutput) {
-  paddle::framework::proto::OpProto op_proto;
+  paddle::framework::proto::OpProto op_proto, op_proto2;
  paddle::framework::OpAttrChecker op_checker;
  TestInplaceProtoMaker proto_maker;
-  ASSERT_THROW(proto_maker(&op_proto, &op_checker),
+  TestInplaceProtoMaker2 proto_maker2;
+
+  proto_maker(&op_proto, &op_checker);
+
+  ASSERT_THROW(proto_maker2(&op_proto2, &op_checker),
               paddle::platform::EnforceNotMet);
-  // proto_maker(&op_proto, &op_checker);
-  // proto_maker.Make();
-  // ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet);
 }
+
+// normal reuse
+class TestReuseProtoMaker : public paddle::framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() {
+    AddInput("X", "input of test op");
+    AddInput("Y", "input of test op");
+    AddOutput("Out", "output of test op");
+    AddOutput("XOut", "output of test op");
+    // avoid destructor exception.
+    // Validate();
+    TestReuse();
+  }
+
+  virtual void TestReuse() {}
+};
+
+// test duplicate reuse error
+class TestReuseProtoMaker2 : public TestReuseProtoMaker {
+ public:
+  void TestReuse() {
+    Reuse("Out", "X");
+    Reuse("Out", "Y");
+  }
+};
+
+// NotExists Input
+class TestReuseProtoMaker3 : public TestReuseProtoMaker {
+ public:
+  void TestReuse() {
+    Reuse("Out", "NotExists");
+    Reuse("XOut", "X");
+  }
+};
+
+// NotExists Output
+class TestReuseProtoMaker4 : public TestReuseProtoMaker {
+ public:
+  void TestReuse() { Reuse("NotExists", "X"); }
+};
+
+TEST(ProtoMaker, Reuse) {
+  paddle::framework::proto::OpProto op_proto;
+  paddle::framework::OpAttrChecker op_checker;
+  TestReuseProtoMaker proto_maker;
+  proto_maker(&op_proto, &op_checker);
+}
+
+// NOTE(dzhwinter):
+// There is a Fatal CHECK on base class destructor, which will call abort inside
+// instead of
+// throw an exception. If we throw an exception in Make(), we will trigger the
+// CHECK and terminate the tests.
+//
+// I had tried to replace the default CHECK with a exception, however, it's
+// still not supported by glog.
+// the details:
+// https://github.com/google/glog/issues/249
+// https://github.com/facebookresearch/TensorComprehensions/issues/351
+/*
+TEST(ProtoMaker, ReuseWithException) {
+  paddle::framework::proto::OpProto op_proto2, op_proto3, op_proto4;
+  paddle::framework::OpAttrChecker op_checker;
+  TestReuseProtoMaker2 proto_maker2;
+  TestReuseProtoMaker3 proto_maker3;
+  TestReuseProtoMaker4 proto_maker4;
+  EXPECT_THROW(proto_maker2(&op_proto2, &op_checker),
+               paddle::platform::EnforceNotMet);
+
+  EXPECT_THROW(proto_maker3(&op_proto3, &op_checker),
+               paddle::platform::EnforceNotMet);
+
+  EXPECT_THROW(proto_maker4(&op_proto4, &op_checker),
+               paddle::platform::EnforceNotMet);
+}
+
+void FailureFunction() {
+  throw std::runtime_error("Check failed in destructor.");
+  // return 0;
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  google::InstallFailureFunction(&FailureFunction);
+  return RUN_ALL_TESTS();
+}
+*/
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@ -18,6 +18,7 @@ limitations under the License. */

 #include "paddle/fluid/framework/data_transform.h"
 #include "paddle/fluid/framework/executor.h"
+#include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/shape_inference.h"
 #include "paddle/fluid/framework/var_type.h"
@ -57,7 +58,11 @@ static DDim GetDims(const Scope& scope, const std::string& name,
  }

  if (var->IsType<LoDTensor>()) {
-    return var->Get<LoDTensor>().dims();
+    const LoDTensor& tensor = var->Get<LoDTensor>();
+    if (UNLIKELY(!tensor.IsInitialized())) {
+      return DDim({-1});
+    }
+    return tensor.dims();
  } else if (var->IsType<SelectedRows>()) {
    if (get_actual_dim) {
      return var->Get<SelectedRows>().value().dims();
@ -69,6 +74,26 @@ static DDim GetDims(const Scope& scope, const std::string& name,
  }
 }

+static std::string GetDtype(const Scope& scope, const std::string& name) {
+  Variable* var = scope.FindVar(name);
+  if (var == nullptr) {
+    return "";
+  }
+
+  if (var->IsType<LoDTensor>()) {
+    const LoDTensor& tensor = var->Get<LoDTensor>();
+    if (UNLIKELY(!tensor.IsInitialized())) {
+      return "";
+    }
+    return DataTypeToString(ToDataType(tensor.type()));
+  } else if (var->IsType<SelectedRows>()) {
+    return DataTypeToString(
+        ToDataType(var->Get<SelectedRows>().value().type()));
+  } else {
+    return "";
+  }
+}
+
 static int GetRowSize(const Scope& scope, const std::string& name) {
  Variable* var = scope.FindVar(name);
  if (var == nullptr) {
@ -91,7 +116,11 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
  }

  if (var->IsType<LoDTensor>()) {
-    return var->Get<LoDTensor>().lod();
+    const LoDTensor& tensor = var->Get<LoDTensor>();
+    if (UNLIKELY(!tensor.IsInitialized())) {
+      return default_lod;
+    }
+    return tensor.lod();
  } else {
    return default_lod;
  }
@ -172,6 +201,8 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const {
        if (row_size >= 0) {
          ss << "[row_size=" << row_size << "]";
        }
+        std::string dtype = GetDtype(*scope, input.second[i]);
+        ss << ":" << dtype;
        ss << "[" << GetDims(*scope, input.second[i], true) << "]";
        ss << "(" << GetLoD(*scope, input.second[i]) << ")";
      }
--- a/paddle/fluid/framework/tensor.h
+++ b/paddle/fluid/framework/tensor.h
@ -82,7 +82,7 @@ class Tensor {
  template <typename T>
  const T* data() const;

-  bool IsInitialized() const;
+  inline bool IsInitialized() const;

  /**
   * @brief   Return a pointer to mutable memory block.
--- a/paddle/fluid/framework/tensor_test.cc
+++ b/paddle/fluid/framework/tensor_test.cc
@ -15,6 +15,7 @@
 #include "paddle/fluid/framework/tensor.h"
 #include <gtest/gtest.h>
 #include <string>
+#include "paddle/fluid/platform/float16.h"

 namespace framework = paddle::framework;
 namespace platform = paddle::platform;
@ -213,3 +214,17 @@ TEST(Tensor, Layout) {
  src.set_layout(framework::DataLayout::kAnyLayout);
  ASSERT_EQ(src.layout(), framework::DataLayout::kAnyLayout);
 }
+
+TEST(Tensor, FP16) {
+  using platform::float16;
+  framework::Tensor src;
+  float16* src_ptr = src.mutable_data<float16>({2, 3}, platform::CPUPlace());
+  for (int i = 0; i < 2 * 3; ++i) {
+    src_ptr[i] = static_cast<float16>(i);
+  }
+  EXPECT_EQ(src.memory_size(), 2 * 3 * sizeof(float16));
+  // EXPECT a human readable error message
+  // src.data<uint8_t>();
+  // Tensor holds the wrong type, it holds N6paddle8platform7float16E at
+  // [/paddle/Paddle/paddle/fluid/framework/tensor_impl.h:43]
+}
--- a/paddle/fluid/inference/analysis/CMakeLists.txt
+++ b/paddle/fluid/inference/analysis/CMakeLists.txt
@ -6,9 +6,11 @@ cc_library(analysis SRCS pass_manager.cc dot.cc node.cc data_flow_graph.cc graph
  tensorrt_subgraph_node_mark_pass.cc
  analyzer.cc
  helper.cc
+        model_store_pass.cc
  DEPS framework_proto proto_desc)
 cc_test(test_node SRCS node_tester.cc DEPS analysis)
 cc_test(test_dot SRCS dot_tester.cc DEPS analysis)
+cc_binary(inference_analyzer SRCS analyzer_main.cc DEPS analysis)

 set(PYTHON_TESTS_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests)

@ -40,3 +42,4 @@ inference_analysis_test(test_tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass_
 inference_analysis_test(test_pass_manager SRCS pass_manager_tester.cc)
 inference_analysis_test(test_tensorrt_subgraph_node_mark_pass SRCS tensorrt_subgraph_node_mark_pass_tester.cc)
 inference_analysis_test(test_analyzer SRCS analyzer_tester.cc)
+inference_analysis_test(test_model_store_pass SRCS model_store_pass_tester.cc)
--- a/paddle/fluid/inference/analysis/analyzer.cc
+++ b/paddle/fluid/inference/analysis/analyzer.cc
@ -17,6 +17,7 @@
 #include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
 #include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
 #include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
+#include "paddle/fluid/inference/analysis/model_store_pass.h"
 #include "paddle/fluid/inference/analysis/pass_manager.h"
 #include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h"
 #include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h"
@ -29,6 +30,9 @@ DEFINE_bool(inference_analysis_enable_tensorrt_subgraph_engine, false,
 DEFINE_string(inference_analysis_graphviz_log_root, "./",
              "Graphviz debuger for data flow graphs.");

+DEFINE_string(inference_analysis_output_storage_path, "",
+              "optimized model output path");
+
 namespace inference {
 namespace analysis {

@ -47,6 +51,9 @@ class DfgPassManagerImpl final : public DfgPassManager {
      AddPass("tensorrt-subgraph", new TensorRTSubGraphPass(trt_teller));
    }
    AddPass("data-flow-graph-to-fluid", new DataFlowGraphToFluidPass);
+    if (!FLAGS_inference_analysis_output_storage_path.empty()) {
+      AddPass("model-store-pass", new ModelStorePass);
+    }
  }

  std::string repr() const override { return "dfg-pass-manager"; }
--- a/paddle/fluid/inference/analysis/analyzer.h
+++ b/paddle/fluid/inference/analysis/analyzer.h
@ -16,28 +16,23 @@ limitations under the License. */

 /*
 * This file contains Analyzer, an class that exposed as a library that analyze
- * and optimize
- * Fluid ProgramDesc for inference. Similar to LLVM, it has multiple flags to
- * control whether
- * an process is applied on the program.
+ * and optimize Fluid ProgramDesc for inference. Similar to LLVM, it has
+ * multiple flags to
+ * control whether an process is applied on the program.
 *
 * The processes are called Passes in analysis, the Passes are placed in a
- * pipeline, the first
- * Pass is the FluidToDataFlowGraphPass which transforms a Fluid ProgramDesc to
- * a data flow
- * graph, the last Pass is DataFlowGraphToFluidPass which transforms a data flow
- * graph to a
- * Fluid ProgramDesc. The passes in the middle of the pipeline can be any Passes
- * which take a
- * node or data flow graph as input.
+ * pipeline, the first Pass is the FluidToDataFlowGraphPass which transforms a
+ * Fluid ProgramDesc to
+ * a data flow graph, the last Pass is DataFlowGraphToFluidPass which transforms
+ * a data flow graph to a Fluid ProgramDesc. The passes in the middle of the
+ * pipeline can be any Passes
+ * which take a node or data flow graph as input.
 *
 * The Analyzer can be used in two methods, the first is a executable file which
- * can be used to
- * pre-process the inference model and can be controlled by passing difference
- * command flags;
+ * can be used to pre-process the inference model and can be controlled by
+ * passing difference command flags;
 * the other way is to compose inside the inference API as a runtime pre-process
- * phase in the
- * inference service.
+ * phase in the inference service.
 */

 #include <gflags/gflags.h>
@ -50,6 +45,7 @@ namespace paddle {
 // flag if not available.
 DECLARE_bool(inference_analysis_enable_tensorrt_subgraph_engine);
 DECLARE_string(inference_analysis_graphviz_log_root);
+DECLARE_string(inference_analysis_output_storage_path);

 namespace inference {
 namespace analysis {
--- a/paddle/fluid/inference/analysis/analyzer_main.cc
+++ b/paddle/fluid/inference/analysis/analyzer_main.cc
@ -0,0 +1,33 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * This file implements analysizer -- an executation help to analyze and
+ * optimize trained model.
+ */
+#include "paddle/fluid/inference/analysis/analyzer.h"
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+
+int main(int argc, char** argv) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  using paddle::inference::analysis::Analyzer;
+  using paddle::inference::analysis::Argument;
+
+  Argument argument;
+  Analyzer analyzer;
+  analyzer.Run(&argument);
+
+  return 0;
+}
--- a/paddle/fluid/inference/analysis/analyzer_tester.cc
+++ b/paddle/fluid/inference/analysis/analyzer_tester.cc
@ -20,14 +20,18 @@ namespace paddle {
 namespace inference {
 namespace analysis {

-TEST_F(DFG_Tester, analysis_without_tensorrt) {
+TEST(Analyzer, analysis_without_tensorrt) {
  FLAGS_inference_analysis_enable_tensorrt_subgraph_engine = false;
+  Argument argument;
+  argument.fluid_model_dir.reset(new std::string(FLAGS_inference_model_dir));
  Analyzer analyser;
  analyser.Run(&argument);
 }

-TEST_F(DFG_Tester, analysis_with_tensorrt) {
+TEST(Analyzer, analysis_with_tensorrt) {
  FLAGS_inference_analysis_enable_tensorrt_subgraph_engine = true;
+  Argument argument;
+  argument.fluid_model_dir.reset(new std::string(FLAGS_inference_model_dir));
  Analyzer analyser;
  analyser.Run(&argument);
 }
--- a/paddle/fluid/inference/analysis/argument.h
+++ b/paddle/fluid/inference/analysis/argument.h
@ -23,6 +23,7 @@

 #pragma once

+#include <string>
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/fluid/inference/analysis/data_flow_graph.h"

@ -36,6 +37,16 @@ namespace analysis {
 * All the fields should be registered here for clearness.
 */
 struct Argument {
+  Argument() = default;
+  explicit Argument(const std::string& fluid_model_dir)
+      : fluid_model_dir(new std::string(fluid_model_dir)) {}
+  // The directory of the trained model.
+  std::unique_ptr<std::string> fluid_model_dir;
+  // The path of `__model__` and `param`, this is used when the file name of
+  // model and param is changed.
+  std::unique_ptr<std::string> fluid_model_program_path;
+  std::unique_ptr<std::string> fluid_model_param_path;
+
  // The graph that process by the Passes or PassManagers.
  std::unique_ptr<DataFlowGraph> main_dfg;

@ -44,6 +55,9 @@ struct Argument {

  // The processed program desc.
  std::unique_ptr<framework::proto::ProgramDesc> transformed_program_desc;
+
+  // The output storage path of ModelStorePass.
+  std::unique_ptr<std::string> model_output_store_path;
 };

 #define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
--- a/paddle/fluid/inference/analysis/data_flow_graph.h
+++ b/paddle/fluid/inference/analysis/data_flow_graph.h
@ -36,6 +36,8 @@ namespace analysis {

 /*
 * DataFlowGraph - A container of Value and Function Nodes.
+ *
+ * This is the base graph for any other type of graphs, such as SSA or CFG.
 */
 struct DataFlowGraph {
  NodeMap nodes;
@ -174,7 +176,7 @@ struct GraphTraits<DataFlowGraph> {
 // sub-graph is the inputs nodes and output nodes that doesn't inside the
 // sub-graph.
 std::pair<std::vector<Node *>, std::vector<Node *>>
-ExtractInputAndOutputOfSubGraph(std::vector<Node *> &graph);
+ExtractInputAndOutputOfSubGraph(std::vector<Node *> &graph);  // NOLINT

 }  // namespace analysis
 }  // namespace inference
--- a/Show More
+++ b/Show More