Merge branch 'feature/check_nan_executor' into feature/rnn_gradient_check

7 years ago · 6f5e64af17
parent 8728885031 5162c41a92
commit 6f5e64af17
46 changed files with 882 additions and 435 deletions
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@ -21,6 +21,8 @@ cc_test(variable_test SRCS variable_test.cc)
 cc_library(scope SRCS scope.cc DEPS glog)
 cc_test(scope_test SRCS scope_test.cc DEPS scope)

+cc_library(data_transform SRCS data_transform.cc DEPS tensor framework_proto)
+cc_test(data_transform_test SRCS data_transform_test.cc DEPS data_transform device_context)

 cc_library(attribute SRCS attribute.cc DEPS framework_proto)
 cc_test(program_desc_test SRCS program_desc_test.cc DEPS proto_desc
@ -29,7 +31,8 @@ cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute)
 cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
 cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
 cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute)
-cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog shape_inference)
+cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
+    shape_inference data_transform)
 cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry init)
 cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog)

@ -64,4 +67,4 @@ cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool)
 cc_library(init SRCS init.cc DEPS gflags device_context place stringpiece)
 cc_test(init_test SRCS init_test.cc DEPS init)

-cc_test(op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context)
+cc_test(op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto)
--- a/paddle/framework/data_transform.cc
+++ b/paddle/framework/data_transform.cc
@ -0,0 +1,26 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/framework/data_transform.h"
+
+namespace paddle {
+namespace framework {
+
+DataTransformFnMap& DataTransformFnMap::Instance() {
+  static DataTransformFnMap data_transform_map;
+  return data_transform_map;
+}
+
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/framework/data_transform.h
+++ b/paddle/framework/data_transform.h
@ -0,0 +1,109 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "paddle/framework/op_kernel_type.h"
+#include "paddle/framework/tensor.h"
+#include "paddle/framework/variable.h"
+#include "paddle/platform/device_context.h"
+#include "paddle/platform/macros.h"
+
+namespace paddle {
+namespace framework {
+
+using DataTransformFN =
+    std::function<void(const std::vector<platform::DeviceContext*> ctx,
+                       const Variable& in, Variable* out)>;
+using KernelTypePair = std::pair<OpKernelType, OpKernelType>;
+
+struct KernelTypePairHash {
+  static void HashCombine(const OpKernelType& t, std::size_t* seed) {
+    OpKernelType::Hash kernel_type_hasher;
+    (*seed) ^= kernel_type_hasher(t) + 0x9e3779b9 + (*seed << 6) + (*seed >> 2);
+  }
+
+  size_t operator()(const KernelTypePair& kernel_pair) const {
+    std::size_t seed = 0;
+    HashCombine(kernel_pair.first, &seed);
+    HashCombine(kernel_pair.second, &seed);
+    return seed;
+  }
+};
+
+using DataTransformMap =
+    std::unordered_map<KernelTypePair, DataTransformFN, KernelTypePairHash>;
+
+class DataTransformFnMap {
+ public:
+  static DataTransformFnMap& Instance();
+
+  bool Has(const KernelTypePair& key_pair) const {
+    return map_.find(key_pair) != map_.end();
+  }
+
+  void Insert(const OpKernelType& left, const OpKernelType& right,
+              const DataTransformFN& data_tranform_fn) {
+    Insert(std::make_pair(left, right), data_tranform_fn);
+  }
+
+  void Insert(const KernelTypePair& kernel_type_pair,
+              const DataTransformFN& data_tranform_fn) {
+    PADDLE_ENFORCE(!Has(kernel_type_pair),
+                   "KernelTypePair %s has been registered", "");
+    map_.insert({kernel_type_pair, data_tranform_fn});
+  }
+
+  const DataTransformFN& Get(const KernelTypePair& key_pair) const {
+    auto data_transformer = GetNullable(key_pair);
+    PADDLE_ENFORCE_NOT_NULL(data_transformer,
+                            "DataTransformFN should not be NULL");
+    return *data_transformer;
+  }
+
+  const DataTransformFN* GetNullable(const KernelTypePair& key_pair) const {
+    auto it = map_.find(key_pair);
+    if (it == map_.end()) {
+      return nullptr;
+    } else {
+      return &(it->second);
+    }
+  }
+
+  const DataTransformMap& Map() const { return map_; }
+
+ private:
+  DataTransformFnMap() = default;
+  DataTransformMap map_;
+  DISABLE_COPY_AND_ASSIGN(DataTransformFnMap);
+};
+
+// generate unique name with __LINE__
+// refs https://stackoverflow.com/questions/1597007
+#define TOKENPASTE(x, y) x##y
+#define TOKENPASTE2(x, y) TOKENPASTE(x, y)
+#define REGISTER_DATA_TRANSFORM_FN(from, to, fn)                              \
+  static int TOKENPASTE2(fn_, __LINE__)() {                                   \
+    ::paddle::framework::DataTransformFnMap::Instance().Insert(from, to, fn); \
+    return 0;                                                                 \
+  }                                                                           \
+  static int TOKENPASTE2(var_, __LINE__) __attribute__((unused)) =            \
+      TOKENPASTE2(fn_, __LINE__)()
+
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/framework/data_transform_test.cc
+++ b/paddle/framework/data_transform_test.cc
@ -0,0 +1,78 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/framework/data_transform.h"
+#include <gtest/gtest.h>
+
+namespace paddle {
+namespace framework {
+
+using namespace platform;
+
+int test_value = 0;
+
+OpKernelType kernel_type_1(proto::DataType::FP32, CPUPlace(), DataLayout::kNCHW,
+                           LibraryType::kCUDNN);
+OpKernelType kernel_type_2(proto::DataType::FP32, CUDAPlace(0),
+                           DataLayout::kNCHW, LibraryType::kCUDNN);
+OpKernelType kernel_type_3(proto::DataType::FP16, CUDAPlace(0),
+                           DataLayout::kNCHW, LibraryType::kCUDNN);
+
+void type1_to_type2(std::vector<platform::DeviceContext*> ctx,
+                    const Variable& in, Variable* out) {
+  test_value++;
+}
+
+void type2_to_type3(std::vector<platform::DeviceContext*> ctx,
+                    const Variable& in, Variable* out) {
+  test_value--;
+}
+
+void type1_to_type3(std::vector<platform::DeviceContext*> ctx,
+                    const Variable& in, Variable* out) {
+  test_value += 2;
+}
+
+}  // namespace framework
+}  // namespace paddle
+
+namespace frw = paddle::framework;
+
+REGISTER_DATA_TRANSFORM_FN(frw::kernel_type_1, frw::kernel_type_2,
+                           frw::type1_to_type2);
+REGISTER_DATA_TRANSFORM_FN(frw::kernel_type_2, frw::kernel_type_3,
+                           frw::type2_to_type3);
+REGISTER_DATA_TRANSFORM_FN(frw::kernel_type_1, frw::kernel_type_3,
+                           frw::type1_to_type3);
+
+TEST(DataTransform, Register) {
+  using namespace paddle::framework;
+  using namespace paddle::platform;
+
+  auto& instance = DataTransformFnMap::Instance();
+  ASSERT_EQ(instance.Map().size(), 3UL);
+  std::vector<DeviceContext*> ctx;
+  paddle::framework::Variable in;
+  paddle::framework::Variable out;
+
+  instance.Get(std::make_pair(frw::kernel_type_1, frw::kernel_type_2))(ctx, in,
+                                                                       &out);
+  ASSERT_EQ(test_value, 1);
+  instance.Get(std::make_pair(frw::kernel_type_2, frw::kernel_type_3))(ctx, in,
+                                                                       &out);
+  ASSERT_EQ(test_value, 0);
+  instance.Get(std::make_pair(frw::kernel_type_1, frw::kernel_type_3))(ctx, in,
+                                                                       &out);
+  ASSERT_EQ(test_value, 2);
+}
--- a/paddle/framework/executor.cc
+++ b/paddle/framework/executor.cc
@ -14,18 +14,17 @@ limitations under the License. */

 #include "paddle/framework/executor.h"

-#include <algorithm>
-#include <iostream>
-#include <memory>
 #include <set>
-#include <vector>

+#include "gflags/gflags.h"
 #include "paddle/framework/feed_fetch_type.h"
 #include "paddle/framework/lod_rank_table.h"
-#include "paddle/framework/lod_tensor.h"
 #include "paddle/framework/lod_tensor_array.h"
 #include "paddle/framework/op_registry.h"
-#include "paddle/framework/scope.h"
+
+DEFINE_bool(check_nan_inf, false,
+            "Checking whether operator produce NAN/INF or not. It will be "
+            "extremely slow so please use this flag wisely.");

 namespace paddle {
 namespace framework {
@ -58,6 +57,19 @@ static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) {
  }
 }

+static void CheckTensorNANOrInf(const std::string& name,
+                                const framework::Tensor& tensor) {
+  if (tensor.type().hash_code() != typeid(float).hash_code() &&
+      tensor.type().hash_code() != typeid(double).hash_code()) {
+    return;
+  }
+  if (tensor.memory_size() == 0) {
+    return;
+  }
+  PADDLE_ENFORCE(!framework::HasInf(tensor), "Tensor %s has Inf", name);
+  PADDLE_ENFORCE(!framework::HasNAN(tensor), "Tensor %s has NAN", name);
+}
+
 void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
                   bool create_local_scope, bool create_vars) {
  // TODO(tonyyang-svail):
@ -101,6 +113,15 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
    auto op = paddle::framework::OpRegistry::CreateOp(*op_desc);
    VLOG(3) << op->DebugString();
    op->Run(*local_scope, place_);
+    if (FLAGS_check_nan_inf) {
+      for (auto& vname : op->OutputVars(true)) {
+        auto* var = local_scope->FindVar(vname);
+        if (var == nullptr) continue;
+        if (var->IsType<framework::LoDTensor>()) {
+          CheckTensorNANOrInf(vname, var->Get<framework::LoDTensor>());
+        }
+      }
+    }
  }
  if (create_local_scope) {
    scope->DeleteScope(local_scope);
--- a/paddle/framework/init.cc
+++ b/paddle/framework/init.cc
@ -71,7 +71,7 @@ bool InitDevices(const std::vector<std::string> &devices) {
    places.emplace_back(platform::CPUPlace());
    LOG(WARNING) << "Not specified CPU device, create CPU by Default.";
  }
-  platform::DeviceContextPool::Create(places);
+  platform::DeviceContextPool::Init(places);
  return true;
 }

--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@ -15,6 +15,7 @@ limitations under the License. */
 #include <algorithm>
 #include <atomic>

+#include "paddle/framework/data_transform.h"
 #include "paddle/framework/executor.h"
 #include "paddle/framework/lod_tensor_array.h"
 #include "paddle/framework/operator.h"
@ -387,8 +388,8 @@ void OperatorWithKernel::Run(const Scope& scope,
                             const platform::Place& place) const {
  RuntimeInferShapeContext infer_shape_ctx(*this, scope);
  this->InferShape(&infer_shape_ctx);
-  platform::DeviceContextPool& pool = platform::DeviceContextPool::Get();
-  auto dev_ctx = pool.Borrow(place);
+  platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
+  auto dev_ctx = pool.Get(place);

  // check if op[type] has kernel registered.
  auto& all_op_kernels = AllOpKernels();
@ -411,7 +412,38 @@ void OperatorWithKernel::Run(const Scope& scope,
                 expected_kernel_key);
  }

-  kernel_iter->second->Compute(ctx);
+  if (actual_kernel_key == expected_kernel_key) {
+    kernel_iter->second->Compute(ctx);
+  } else {
+    Scope& op_scope = scope.NewScope();
+    auto input_vars = this->InputVars();
+    for (auto var_name : input_vars) {
+      op_scope.Var(var_name);
+    }
+
+    // TODO(qijun) get appropriate DeviceContext from DeviceContext pool
+    platform::DeviceContext* trans_dev_ctx = nullptr;
+    std::vector<platform::DeviceContext*> trans_dev_ctx_vec{trans_dev_ctx};
+
+    // TODO(qijun) get appropriate DataTransformFN from global map
+    framework::DataTransformFN trans_fun = nullptr;
+
+    // Wait for transform starting
+    dev_ctx->Wait();
+
+    for (auto var_name : input_vars) {
+      trans_fun(trans_dev_ctx_vec, *(scope.FindVar(var_name)),
+                op_scope.FindVar(var_name));
+    }
+    // Wait for data transform finishing
+    for (auto ctx : trans_dev_ctx_vec) {
+      ctx->Wait();
+    }
+
+    // Create a new ExecutionContext
+    ExecutionContext op_ctx(*this, op_scope, *dev_ctx);
+    kernel_iter->second->Compute(op_ctx);
+  }
 }

 OpKernelType OperatorWithKernel::GetActualKernelType(
--- a/paddle/framework/tensor_util.h
+++ b/paddle/framework/tensor_util.h
@ -13,7 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #pragma once
+#include "paddle/framework/data_type.h"
+#include "paddle/framework/eigen.h"
 #include "paddle/framework/tensor.h"
+#include "paddle/platform/device_context.h"

 namespace paddle {
 namespace framework {
@ -205,5 +208,100 @@ inline void CopyToVector(const Tensor& src, std::vector<T>* dst) {
               src_ptr, size);
 }

+template <typename Predicate, typename DevCtx>
+struct AnyDTypeVisitor {
+  Predicate predicate_;
+  const Tensor& tensor_;
+  const DevCtx& ctx_;
+  Tensor* out_;
+
+  AnyDTypeVisitor(Predicate predicate, const Tensor& tensor, const DevCtx& ctx,
+                  Tensor* out)
+      : predicate_(predicate), tensor_(tensor), ctx_(ctx), out_(out) {}
+
+  template <typename T>
+  void operator()() const {
+    auto t = EigenVector<T>::Flatten(tensor_);
+    auto o = EigenScalar<bool>::From(*out_);
+    o.device(*ctx_.eigen_device()) = predicate_(t).any();
+  }
+};
+
+template <typename Predicate, typename DevCtx>
+inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor,
+                    const DevCtx& ctx, framework::Tensor* out) {
+  VisitDataType(ToDataType(tensor.type()), AnyDTypeVisitor<Predicate, DevCtx>(
+                                               predicate, tensor, ctx, out));
+}
+
+template <typename Predicate>
+struct AnyVisitor : public boost::static_visitor<bool> {
+  const framework::Tensor& tensor_;
+  Predicate predicate_;
+
+  AnyVisitor(const framework::Tensor& tensor, Predicate predicate)
+      : tensor_(tensor), predicate_(std::move(predicate)) {}
+
+  template <typename Place>
+  bool operator()(const Place& place) const {
+    framework::Tensor out;
+    out.Resize({1});
+    out.mutable_data<bool>(place);
+    auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place);
+    AnyImpl(predicate_, tensor_, *ctx, &out);
+    return this->GetResult(out, place);
+  }
+
+  bool GetResult(const framework::Tensor& out,
+                 const platform::CUDAPlace& gpu) const {
+    platform::CPUPlace cpu;
+    framework::Tensor tmp;
+    tmp.Resize({1});
+    tmp.mutable_data<bool>(cpu);
+    platform::DeviceContextPool::Instance().Get(gpu)->Wait();
+    CopyFrom(out, cpu, &tmp);
+    platform::DeviceContextPool::Instance().Get(gpu)->Wait();
+    return GetResult(tmp, cpu);
+  }
+
+  bool GetResult(const framework::Tensor& out,
+                 const platform::CPUPlace& cpu) const {
+    return *out.data<bool>();
+  }
+};
+
+template <typename Predicate>
+inline bool Any(const framework::Tensor& tensor, Predicate predicate) {
+  AnyVisitor<Predicate> visitor(tensor, predicate);
+  auto place = tensor.place();
+  return platform::VisitPlace(place, visitor);
+}
+
+struct HasNANPredicate {
+  template <typename T>
+  auto operator()(const T& eigen_vec) const
+      -> decltype(std::declval<T>().isnan()) {
+    return eigen_vec.isnan();
+  }
+};
+
+inline bool HasNAN(const framework::Tensor& tensor) {
+  HasNANPredicate predicate;
+  return Any(tensor, predicate);
+}
+
+struct HasInfPredicate {
+  template <typename T>
+  auto operator()(const T& eigen_vec) const
+      -> decltype(std::declval<T>().isinf()) {
+    return eigen_vec.isinf();
+  }
+};
+
+inline bool HasInf(const framework::Tensor& tensor) {
+  HasInfPredicate predicate;
+  return Any(tensor, predicate);
+}
+
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/framework/tensor_util_test.cc
+++ b/paddle/framework/tensor_util_test.cc
@ -13,6 +13,7 @@

 #include "paddle/framework/tensor_util.h"
 #include <gtest/gtest.h>
+#include <cmath>
 #include <string>

 namespace paddle {
@ -230,5 +231,28 @@ TEST(CopyToVector, Tensor) {
 #endif
 }

+TEST(IsNAN, CPU) {
+  using namespace paddle::framework;
+  using namespace paddle::platform;
+  Tensor src;
+  float* buf = src.mutable_data<float>({3}, CPUPlace());
+  buf[0] = 0.0;
+  buf[1] = NAN;
+  buf[2] = 0.0;
+
+  ASSERT_TRUE(HasNAN(src));
+}
+
+TEST(IsInf, CPU) {
+  using namespace paddle::framework;
+  using namespace paddle::platform;
+  Tensor src;
+  double* buf = src.mutable_data<double>({3}, CPUPlace());
+  buf[0] = 1.0;
+  buf[1] = INFINITY;
+  buf[2] = 0.0;
+  ASSERT_TRUE(HasInf(src));
+}
+
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/gserver/layers/MKLDNNLRNLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNLRNLayer.cpp
@ -29,7 +29,7 @@ bool MKLDNNLRNLayer::init(const LayerMap& layerMap,
  }

  /* the size of inputs for norm-layer is 1 */
-  CHECK_EQ(config_.inputs_size(), 1UL);
+  CHECK_EQ(config_.inputs_size(), 1);
  const NormConfig& conf = config_.inputs(0).norm_conf();
  localSize_ = conf.size();
  alpha_ = conf.scale();
--- a/paddle/operators/activation_op.cc
+++ b/paddle/operators/activation_op.cc
--- a/paddle/operators/activation_op.h
+++ b/paddle/operators/activation_op.h
--- a/paddle/operators/array_operator.h
+++ b/paddle/operators/array_operator.h
@ -35,8 +35,8 @@ class ArrayOp : public framework::OperatorBase {
    PADDLE_ENFORCE_EQ(i_tensor.numel(), 1);

    // get device context from pool
-    platform::DeviceContextPool &pool = platform::DeviceContextPool::Get();
-    auto &dev_ctx = *pool.Borrow(place);
+    platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
+    auto &dev_ctx = *pool.Get(place);

    size_t offset;
    if (platform::is_gpu_place(i_tensor.place())) {
--- a/paddle/operators/array_to_lod_tensor_op.cc
+++ b/paddle/operators/array_to_lod_tensor_op.cc
@ -106,8 +106,9 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
        }
        auto slice = out->Slice(out_offset, out_offset + len);

-        platform::DeviceContextPool &pool = platform::DeviceContextPool::Get();
-        auto &dev_ctx = *pool.Borrow(place);
+        platform::DeviceContextPool &pool =
+            platform::DeviceContextPool::Instance();
+        auto &dev_ctx = *pool.Get(place);

        framework::CopyFrom(x[x_idx].Slice(start_offset, end_offset), place,
                            dev_ctx, &slice);
--- a/paddle/operators/assign_op.cc
+++ b/paddle/operators/assign_op.cc
@ -82,8 +82,8 @@ class AssignOp : public framework::OperatorBase {
        out != nullptr,
        "The Output(Out) should not be null if the Input(X) is set.");

-    platform::DeviceContextPool &pool = platform::DeviceContextPool::Get();
-    auto &dev_ctx = *pool.Borrow(place);
+    platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
+    auto &dev_ctx = *pool.Get(place);

    framework::VisitVarType(*x, AssignFunctor(out, dev_ctx));
  }
--- a/paddle/operators/beam_search_decode_op.cc
+++ b/paddle/operators/beam_search_decode_op.cc
@ -57,8 +57,8 @@ class BeamSearchDecodeOp : public framework::OperatorBase {
      : OperatorBase(type, inputs, outputs, attrs) {}
  void Run(const framework::Scope& scope,
           const platform::Place& dev_place) const override {
-    platform::DeviceContextPool& pool = platform::DeviceContextPool::Get();
-    auto& dev_ctx = *pool.Borrow(dev_place);
+    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
+    auto& dev_ctx = *pool.Get(dev_place);

    framework::ExecutionContext ctx(*this, scope, dev_ctx);

--- a/paddle/operators/cond_op.cc
+++ b/paddle/operators/cond_op.cc
@ -195,8 +195,8 @@ void CondOp::MergeDataFromSubnet(const framework::Scope& scope,

 void CondOp::Run(const Scope& scope, const platform::Place& place) const {
  // get device context from pool
-  platform::DeviceContextPool& pool = platform::DeviceContextPool::Get();
-  auto& dev_ctx = *pool.Borrow(place);
+  platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
+  auto& dev_ctx = *pool.Get(place);

  PrepareDataForSubnet(scope, dev_ctx);
  std::vector<framework::Scope*>& sub_scopes = GetSubScopes(scope);
--- a/paddle/operators/feed_op.cc
+++ b/paddle/operators/feed_op.cc
@ -49,8 +49,8 @@ class FeedOp : public framework::OperatorBase {
    auto *out_item = out_var->GetMutable<framework::FeedFetchType>();

    // get device context from pool
-    platform::DeviceContextPool &pool = platform::DeviceContextPool::Get();
-    auto &dev_ctx = *pool.Borrow(place);
+    platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
+    auto &dev_ctx = *pool.Get(place);

    framework::CopyFrom(feed_item, place, dev_ctx, out_item);
    out_item->set_lod(feed_item.lod());
--- a/paddle/operators/fetch_op.cc
+++ b/paddle/operators/fetch_op.cc
@ -52,8 +52,8 @@ class FetchOp : public framework::OperatorBase {

    // FIXME(yuyang18): Should we assume the fetch operator always generate
    // CPU outputs?
-    platform::DeviceContextPool &pool = platform::DeviceContextPool::Get();
-    auto &dev_ctx = *pool.Borrow(place);
+    platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
+    auto &dev_ctx = *pool.Get(place);

    CopyFrom(src_item, platform::CPUPlace(), dev_ctx, &dst_item);
    dev_ctx.Wait();
--- a/paddle/operators/fill_constant_op.cc
+++ b/paddle/operators/fill_constant_op.cc
@ -49,8 +49,8 @@ class FillConstantOp : public framework::OperatorBase {
      out.mutable_data(dev_place, framework::ToTypeIndex(data_type));
    }

-    platform::DeviceContextPool &pool = platform::DeviceContextPool::Get();
-    auto &dev_ctx = *pool.Borrow(dev_place);
+    platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
+    auto &dev_ctx = *pool.Get(dev_place);
    math::set_constant(dev_ctx, &out, value);
  }
 };
--- a/paddle/operators/fill_op.cc
+++ b/paddle/operators/fill_op.cc
@ -69,8 +69,9 @@ class FillOp : public framework::OperatorBase {

    if (!force_cpu && platform::is_gpu_place(place)) {
      // Copy tensor to out
-      platform::DeviceContextPool &pool = platform::DeviceContextPool::Get();
-      auto &dev_ctx = *pool.Borrow(place);
+      platform::DeviceContextPool &pool =
+          platform::DeviceContextPool::Instance();
+      auto &dev_ctx = *pool.Get(place);
      framework::CopyFrom(tensor, place, dev_ctx, &out);
    }
  }
--- a/paddle/operators/load_op.cc
+++ b/paddle/operators/load_op.cc
@ -40,8 +40,8 @@ class LoadOp : public framework::OperatorBase {
    auto *tensor = out_var->GetMutable<framework::LoDTensor>();
    framework::DeserializeFromStream(fin, tensor);

-    platform::DeviceContextPool &pool = platform::DeviceContextPool::Get();
-    auto &dev_ctx = *pool.Borrow(place);
+    platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
+    auto &dev_ctx = *pool.Get(place);

    if (platform::is_gpu_place(place)) {
      // copy CPU to GPU
--- a/paddle/operators/lod_tensor_to_array_op.cc
+++ b/paddle/operators/lod_tensor_to_array_op.cc
@ -88,8 +88,9 @@ class LoDTensorToArrayOp : public framework::OperatorBase {
        auto slice = out[i].Slice(static_cast<int>(offset),
                                  static_cast<int>(offset + len));

-        platform::DeviceContextPool &pool = platform::DeviceContextPool::Get();
-        auto &dev_ctx = *pool.Borrow(place);
+        platform::DeviceContextPool &pool =
+            platform::DeviceContextPool::Instance();
+        auto &dev_ctx = *pool.Get(place);

        framework::CopyFrom(x.Slice(static_cast<int>(each_range.begin),
                                    static_cast<int>(each_range.end)),
--- a/paddle/operators/merge_lod_tensor_op.cc
+++ b/paddle/operators/merge_lod_tensor_op.cc
@ -30,8 +30,8 @@ class MergeLoDTensorOp : public framework::OperatorBase {
  void Run(const framework::Scope &scope,
           const platform::Place &dev_place) const override {
    // get device context from pool
-    platform::DeviceContextPool &pool = platform::DeviceContextPool::Get();
-    auto &dev_ctx = *pool.Borrow(dev_place);
+    platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
+    auto &dev_ctx = *pool.Get(dev_place);

    auto &x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>();
    auto &mask = scope.FindVar(Input("Mask"))->Get<framework::LoDTensor>();
--- a/paddle/operators/nccl_op_test.cu.cc
+++ b/paddle/operators/nccl_op_test.cu.cc
@ -305,7 +305,7 @@ int main(int argc, char **argv) {
  }

  VLOG(0) << " DeviceCount " << count;
-  paddle::platform::DeviceContextPool::Create(places);
+  paddle::platform::DeviceContextPool::Init(places);

  testing::InitGoogleTest(&argc, argv);

--- a/Show More
+++ b/Show More