Refine error msg in paddle/fluid/framework/details [part 2] (#27429)

* refine broadcast_op_handle * refine some error messages * refine some files * fix bug * fix bug * fix bug * follow comments * follow comments
5 years ago · 35074963e3
parent 162b4d6c13
commit 35074963e3
18 changed files with 475 additions and 172 deletions
--- a/paddle/fluid/framework/details/all_reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc
@ -76,7 +76,7 @@ void AllReduceOpHandle::AllReduceImpl(
                    platform::errors::InvalidArgument(
                        "The NoDummyInputSize should be equal "
                        "to the number of places, but got NoDummyInputSize is "
-                        "%d and the number of place is %d.",
+                        "%d and the number of places is %d.",
                        in_var_handles.size(), num_places));
  PADDLE_ENFORCE_EQ(
      in_var_handles.size(), out_var_handles.size(),
@ -89,7 +89,7 @@ void AllReduceOpHandle::AllReduceImpl(
      platform::errors::InvalidArgument(
          "The number of local scopes should be equal "
          "to the number of places, but got the number of local scopes is "
-          "%d and the number of place is %d.",
+          "%d and the number of places is %d.",
          in_var_handles.size(), num_places));
  std::vector<const void *> lod_tensor_data;
--- a/paddle/fluid/framework/details/broadcast_op_handle.cc
+++ b/paddle/fluid/framework/details/broadcast_op_handle.cc
@ -13,6 +13,7 @@
 // limitations under the License.
 #include "paddle/fluid/framework/details/broadcast_op_handle.h"
 #include "paddle/fluid/framework/details/container_cast.h"
 #include "paddle/fluid/framework/details/variable_visitor.h"
 #include "paddle/fluid/platform/profiler.h"
@ -31,10 +32,15 @@ void BroadcastOpHandle::RunImpl() {
  auto out_var_handles = DynamicCast<VarHandle>(outputs_);
  PADDLE_ENFORCE_EQ(in_var_handles.size(), 1UL,
-                    "The number of input should be one.");
+                    platform::errors::PreconditionNotMet(
-  PADDLE_ENFORCE_EQ(
+                        "The number of inputs should be 1, but got %d.",
-      out_var_handles.size(), places_.size(),
+                        in_var_handles.size()));
-      "The number of output should equal to the number of places.");
+  PADDLE_ENFORCE_EQ(out_var_handles.size(), places_.size(),
                    platform::errors::PreconditionNotMet(
                        "The number of outputs and the number of places should "
                        "be equal, but got the number of outputs is %d and the "
                        "number of places is %d.",
                        out_var_handles.size(), places_.size()));
  VarHandle *in_var_handle = in_var_handles[0];
@ -47,7 +53,9 @@ void BroadcastOpHandle::BroadcastOneVar(
    const std::vector<Scope *> &var_scopes) {
  auto *in_var =
      var_scopes.at(in_var_handle.scope_idx())->FindVar(in_var_handle.name());
-  PADDLE_ENFORCE_NOT_NULL(in_var);
+  PADDLE_ENFORCE_NOT_NULL(
      in_var, platform::errors::NotFound("Variable %s is not found in scopes.",
                                         in_var_handle.name()));
  Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var);
  if (UNLIKELY(!in_tensor.IsInitialized())) {
    VLOG(3) << "in var " << in_var_handle.name() << "not inited, return!";
@ -103,7 +111,7 @@ void BroadcastOpHandle::BroadcastOneVar(
      broadcast_calls.emplace_back(
          [send_recv_buffer, numel, type, root_id, &nccl_ctx] {
-            PADDLE_ENFORCE(platform::dynload::ncclBcast(
+            PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclBcast(
                send_recv_buffer, numel, static_cast<ncclDataType_t>(type),
                root_id, nccl_ctx.comm_, nccl_ctx.stream()));
          });
@ -131,7 +139,8 @@ void BroadcastOpHandle::BroadcastOneVar(
      nccl_ctxs_->DevCtx(p)->Wait();
    }
 #else
-    PADDLE_THROW("CUDA is not enabled.");
+    PADDLE_THROW(
        platform::errors::PreconditionNotMet("Not compiled with NCLL."));
 #endif
  }
 }
@ -154,10 +163,13 @@ void BroadcastOpHandle::InitOutputValue(
    auto t_out_p = out_var_handle->place();
    auto *out_var = var_scopes.at(out_var_handle->scope_idx())
                        ->FindVar(out_var_handle->name());
-    PADDLE_ENFORCE_NOT_NULL(out_var);
+    PADDLE_ENFORCE_NOT_NULL(out_var, platform::errors::NotFound(
                                         "Variable %s is not found in scopes.",
                                         out_var_handle->name()));
    if (is_gpu_place(in_tensor.place())) {
-      PADDLE_ENFORCE(platform::is_gpu_place(t_out_p),
+      PADDLE_ENFORCE_EQ(platform::is_gpu_place(t_out_p), true,
-                     "Places of input and output must be all on GPU.");
+                        platform::errors::PreconditionNotMet(
                            "Places of input and output must be all on GPU."));
    } else {
      t_out_p = platform::CPUPlace();
    }
--- a/paddle/fluid/framework/details/broadcast_op_handle_test.h
+++ b/paddle/fluid/framework/details/broadcast_op_handle_test.h
@ -79,7 +79,8 @@ struct TestBroadcastOpHandle {
      }
      nccl_ctxs_.reset(new platform::NCCLContextMap(place_list_));
 #else
-      PADDLE_THROW("CUDA is not support.");
+      PADDLE_THROW(
          platform::errors::PreconditionNotMet("Not compiled with NCLL."));
 #endif
    } else {
      int count = 8;
@ -113,7 +114,8 @@ struct TestBroadcastOpHandle {
      op_handle_ = new BroadcastOpHandle(nodes_.back().get(), local_scopes_,
                                         place_list_, nccl_ctxs_.get());
 #else
-      PADDLE_THROW("CUDA is not support.");
+      PADDLE_THROW(
          platform::errors::PreconditionNotMet("Not compiled with NCLL."));
 #endif
    } else {
 #if defined(PADDLE_WITH_NCCL)
@ -171,7 +173,9 @@ struct TestBroadcastOpHandle {
                                   float val_scalar = 0.0) {
    auto var = param_scopes_[input_scope_idx]->FindVar(varname);
-    PADDLE_ENFORCE_NOT_NULL(var);
+    PADDLE_ENFORCE_NOT_NULL(
        var, platform::errors::NotFound("Variable %s is not found in scope.",
                                        varname));
    auto lod_tensor = var->GetMutable<f::LoDTensor>();
    std::vector<float> send_vector(static_cast<size_t>(f::product(kDims)));
    for (size_t k = 0; k < send_vector.size(); ++k) {
@ -194,7 +198,9 @@ struct TestBroadcastOpHandle {
    }
    auto var = param_scopes_[input_scope_idx]->FindVar(varname);
-    PADDLE_ENFORCE_NOT_NULL(var);
+    PADDLE_ENFORCE_NOT_NULL(
        var, platform::errors::NotFound("Variable %s is not found in scope.",
                                        varname));
    auto selected_rows = var->GetMutable<f::SelectedRows>();
    auto value = selected_rows->mutable_value();
    value->mutable_data<float>(kDims, place_list_[input_scope_idx]);
@ -211,13 +217,24 @@ struct TestBroadcastOpHandle {
                         const std::vector<float>& send_vector,
                         const std::vector<int64_t>& rows, int height) {
    auto var = param_scopes_[input_scope_idx]->FindVar(varname);
-    PADDLE_ENFORCE_NOT_NULL(var);
+    PADDLE_ENFORCE_NOT_NULL(
        var, platform::errors::NotFound("Variable %s is not found in scope.",
                                        varname));
    auto& selected_rows = var->Get<f::SelectedRows>();
    auto rt = selected_rows.value();
-    PADDLE_ENFORCE_EQ(selected_rows.height(), height, "height is not equal.");
+    PADDLE_ENFORCE_EQ(selected_rows.height(), height,
                      platform::errors::InvalidArgument(
                          "The height of SelectedRows is not equal to "
                          "the expected, expect %d, but got %ld.",
                          height, selected_rows.height()));
    for (size_t k = 0; k < selected_rows.rows().size(); ++k) {
-      PADDLE_ENFORCE_EQ(selected_rows.rows()[k], rows[k]);
+      PADDLE_ENFORCE_EQ(
          selected_rows.rows()[k], rows[k],
          platform::errors::InvalidArgument(
              "The item at position %zu of rows of SelectedRows "
              "is not equal to the expected, expect %ld, but got %ld.",
              k, rows[k], selected_rows.rows()[k]));
    }
    p::CPUPlace cpu_place;
@ -235,9 +252,15 @@ struct TestBroadcastOpHandle {
                      framework::Scope* scope) {
    p::CPUPlace cpu_place;
    auto var = scope->FindVar(varname);
-    PADDLE_ENFORCE_NOT_NULL(var);
+    PADDLE_ENFORCE_NOT_NULL(
        var, platform::errors::NotFound("Variable %s is not found in scope.",
                                        varname));
    auto tensor = var->Get<f::LoDTensor>();
-    PADDLE_ENFORCE_EQ(tensor.lod(), lod, "lod is not equal.");
+    PADDLE_ENFORCE_EQ(tensor.lod(), lod,
                      platform::errors::InvalidArgument(
                          "The LoD of tensor is not equal to "
                          "the expected, expect %s, but got %s.",
                          lod, tensor.lod()));
    f::Tensor result_tensor;
    f::TensorCopySync(tensor, cpu_place, &result_tensor);
    float* ct = result_tensor.mutable_data<float>(cpu_place);
--- a/paddle/fluid/framework/details/build_strategy.cc
+++ b/paddle/fluid/framework/details/build_strategy.cc
@ -235,7 +235,8 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
              AppendPass("reduce_mode_multi_devices_pass").get();
          break;
        default:
-          PADDLE_THROW("Unknown reduce strategy.");
+          PADDLE_THROW(
              platform::errors::Unimplemented("Unknown reduce strategy."));
      }
    }
    multi_devices_pass->SetNotOwned<const BuildStrategy>("strategy",
--- a/paddle/fluid/framework/details/eager_deletion_op_handle.cc
+++ b/paddle/fluid/framework/details/eager_deletion_op_handle.cc
@ -12,11 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/framework/details/eager_deletion_op_handle.h"
 #include <memory>
 #include <unordered_set>
 #include <utility>
 #include "paddle/fluid/framework/details/eager_deletion_op_handle.h"
 #include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h"
 #include "paddle/fluid/framework/lod_tensor_array.h"
 #include "paddle/fluid/framework/scope.h"
@ -47,15 +48,19 @@ EagerDeletionOpHandle::EagerDeletionOpHandle(
    if (dynamic_cast<StreamGarbageCollector *>(gc_)) {
      platform::CUDADeviceGuard guard(
          BOOST_GET_CONST(platform::CUDAPlace, place).device);
-      PADDLE_ENFORCE(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming));
+      PADDLE_ENFORCE_CUDA_SUCCESS(
-      PADDLE_ENFORCE_NOT_NULL(event_);
+          cudaEventCreateWithFlags(&event_, cudaEventDisableTiming));
      PADDLE_ENFORCE_NOT_NULL(event_, platform::errors::InvalidArgument(
                                          "The cuda envet created is NULL."));
    }
  }
 #endif
-  PADDLE_ENFORCE_NE(vars.empty(), true, platform::errors::InvalidArgument(
+  PADDLE_ENFORCE_NE(vars.empty(), true,
-                                            "Variable names are empty."));
+                    platform::errors::InvalidArgument(
                        "The variables to be deleted are empty."));
  for (auto *var : var_infos_) {
-    PADDLE_ENFORCE_NOT_NULL(var);
+    PADDLE_ENFORCE_NOT_NULL(var, platform::errors::InvalidArgument(
                                     "The memory optimization info is NULL."));
  }
 }
@ -64,7 +69,7 @@ EagerDeletionOpHandle::~EagerDeletionOpHandle() {
  if (event_) {
    auto gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dev_ctx_->GetPlace());
    platform::CUDADeviceGuard guard(gpu_place.device);
-    PADDLE_ENFORCE(cudaEventDestroy(event_));
+    PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventDestroy(event_));
  }
 #endif
 }
@ -78,12 +83,17 @@ void EagerDeletionOpHandle::InitCUDA() {
 }
 void EagerDeletionOpHandle::CallOnce() {
-  PADDLE_ENFORCE(vars_.empty(), "vars_ must be initialized here");
+  PADDLE_ENFORCE_EQ(
      vars_.empty(), true,
      platform::errors::InvalidArgument(
          "The variables to be deleted should be initialized here."));
  Scope *exec_scope = local_exec_scopes_[0];
  for (auto *var_info : var_infos_) {
    auto *var = exec_scope->FindVar(var_info->Name());
-    PADDLE_ENFORCE_NOT_NULL(var, "Variable %s should not be nullptr",
+    PADDLE_ENFORCE_NOT_NULL(
-                            var_info->Name());
+        var, platform::errors::NotFound(
                 "The variable(%s) to be inplaced is not found in scope.",
                 var_info->Name()));
    vars_.emplace_back(var);
  }
 }
@ -119,8 +129,9 @@ void EagerDeletionOpHandle::RunImpl() {
        garbages.emplace_back(t.MoveMemoryHolder());
      }
    } else {
-      PADDLE_THROW("Type %s of %s is not supported eager deletion",
+      PADDLE_THROW(platform::errors::Unimplemented(
-                   framework::ToTypeName(var->Type()), var_info->Name());
+          "The variable(%s) of type %s is not supported in eager deletion.",
          framework::ToTypeName(var->Type()), var_info->Name()));
    }
  }
@ -137,8 +148,9 @@ void EagerDeletionOpHandle::ClearGarbages(
    auto callback_stream =
        reinterpret_cast<StreamGarbageCollector *>(gc_)->stream();
    auto callback_func = [=]() {
-      PADDLE_ENFORCE(cudaEventRecord(event_, compute_stream));
+      PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventRecord(event_, compute_stream));
-      PADDLE_ENFORCE(cudaStreamWaitEvent(callback_stream, event_, 0));
+      PADDLE_ENFORCE_CUDA_SUCCESS(
          cudaStreamWaitEvent(callback_stream, event_, 0));
    };
    gc_->Add(std::move(*garbages), callback_func);
  } else {
--- a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
@ -12,8 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/framework/details/fused_all_reduce_op_handle.h"
 #include <algorithm>
 #include <utility>
 #include "paddle/fluid/framework/details/container_cast.h"
 #include "paddle/fluid/framework/details/reduce_and_gather.h"
 #include "paddle/fluid/framework/details/variable_visitor.h"
@ -56,10 +58,20 @@ void FusedAllReduceOpHandle::RunImpl() {
  size_t place_num = places_.size();
  PADDLE_ENFORCE_EQ(
      in_var_handles.size(), place_num * num_of_all_reduce_,
-      "The NoDummyInputSize should be equal to the number of places.");
+      platform::errors::PreconditionNotMet(
          "The number of input variable handles should be equal to the number "
          "of places plus the number of all reduce handles, "
          "but got the number of input variable handles is %d, the "
          "number of places is %d, and the number of all reduce handles "
          "is %d.",
          in_var_handles.size(), place_num, num_of_all_reduce_));
  PADDLE_ENFORCE_EQ(
      in_var_handles.size(), out_var_handles.size(),
-      "The NoDummyInputSize and NoDummyOutputSize should be equal.");
+      platform::errors::PreconditionNotMet(
          "The number of input variable handles should be equal to the number "
          "of output variable handles, but got the number of input variable "
          "handles is %d, and the number of  output variable handles is %d.",
          in_var_handles.size(), out_var_handles.size()));
  // Note: some gradient op doesn't have CUDAKernel, so the gradients of
  // those op are in CPUPlace, in this case, the all reduce should not be fused.
@ -106,7 +118,13 @@ void FusedAllReduceOpHandle::FusedAllReduceFunc(
      dtype = ele_dtype;
    }
-    PADDLE_ENFORCE_EQ(ele_dtype, dtype);
+    PADDLE_ENFORCE_EQ(
        ele_dtype, dtype,
        platform::errors::InvalidArgument(
            "The DataType of grad tensors of fused_all_reduce_op_handle  "
            "must be consistent. The current dtype is %s, but the "
            "previous dtype is %s.",
            DataTypeToString(ele_dtype), DataTypeToString(dtype)));
    // Check whether the address space is contiguous.
    std::sort(
@ -130,16 +148,29 @@ void FusedAllReduceOpHandle::FusedAllReduceFunc(
          "input[%d] address: 0X%02x. The offset: %d",
          k - 1, g_tensor.at(k - 1).first, cur_address, g_tensor.at(k).first, k,
          next_address, k, infer_next_address, offset);
-      PADDLE_ENFORCE_EQ(infer_next_address, next_address,
+      PADDLE_ENFORCE_EQ(
-                        "The address is not consistent.");
+          infer_next_address, next_address,
          platform::errors::InvalidArgument(
              "The infered address of the next tensor should be equal to the "
              "real address of the next tensor. But got infered address is %p "
              "and real address is %p.",
              infer_next_address, next_address));
    }
  }
  if (!FLAGS_skip_fused_all_reduce_check) {
    for (size_t scope_idx = 0; scope_idx < place_num; ++scope_idx) {
      for (size_t j = 1; j < num_of_all_reduce_; ++j) {
-        PADDLE_ENFORCE_EQ(grads_tensor.at(0).at(j).first,
+        PADDLE_ENFORCE_EQ(
-                          grads_tensor.at(scope_idx).at(j).first);
+            grads_tensor.at(0).at(j).first,
            grads_tensor.at(scope_idx).at(j).first,
            platform::errors::InvalidArgument(
                "The variable name of grad tensors of "
                "fused_all_reduce_op_handle  "
                "must be consistent. The current name is %s, but the "
                "previous name is %s.",
                grads_tensor.at(0).at(j).first,
                grads_tensor.at(scope_idx).at(j).first));
      }
    }
  }
@ -167,7 +198,9 @@ bool FusedAllReduceOpHandle::InputIsInDifferentPlace(
    for (size_t j = 0; j < in_var_handles.size(); j += place_num) {
      auto var_name = in_var_handles[j]->name();
      auto var = local_scope->FindVar(var_name);
-      PADDLE_ENFORCE_NOT_NULL(var, "%s is not found in local scope.", var_name);
+      PADDLE_ENFORCE_NOT_NULL(
          var, platform::errors::NotFound(
                   "The variable '%s' is not found in local scope.", var_name));
      auto &lod_tensor = var->Get<LoDTensor>();
      if (!is_same_place(lod_tensor.place(), places_.at(scope_idx))) {
        return true;
@ -185,14 +218,24 @@ void FusedAllReduceOpHandle::GetGradLoDTensor(
  size_t place_num = places_.size();
  for (size_t j = 0; j < in_var_handles.size(); j += place_num) {
    auto var_name = in_var_handles[j]->name();
-    PADDLE_ENFORCE_EQ(var_name, out_var_handles[j]->name());
+    PADDLE_ENFORCE_EQ(
        var_name, out_var_handles[j]->name(),
        platform::errors::InvalidArgument(
            "The name of input variable should be equal "
            "to the name of output variable. But got the name of input "
            "variable is %s and the name of output variable is %s.",
            var_name, out_var_handles[j]->name()));
    auto var = local_scope->FindVar(var_name);
-    PADDLE_ENFORCE_NOT_NULL(var, "%s is not found in local scope.", var_name);
+    PADDLE_ENFORCE_NOT_NULL(
        var, platform::errors::NotFound(
                 "The variable '%s' is not found in local scope.", var_name));
    auto &lod_tensor = var->Get<LoDTensor>();
    PADDLE_ENFORCE_EQ(
        platform::is_same_place(lod_tensor.place(), places_.at(scope_idx)),
-        true, "%s(%d) is not in the right place.", var_name, scope_idx);
+        true, platform::errors::InvalidArgument(
                  "The variable '%s' at scope %d is not in the right place.",
                  var_name, scope_idx));
    grad_tensor->emplace_back(std::make_pair(var_name, &lod_tensor));
  }
 }
@ -204,16 +247,26 @@ void FusedAllReduceOpHandle::GetDTypeAndNumel(
  size_t size_of_dtype = 0;
  for (size_t i = 0; i < grad_tensor.size(); ++i) {
    // Get dtype
-    auto ele_type = grad_tensor.at(i).second->type();
+    auto ele_dtype = grad_tensor.at(i).second->type();
    if (i == 0) {
-      *dtype = ele_type;
+      *dtype = ele_dtype;
-      size_of_dtype = framework::SizeOfType(ele_type);
+      size_of_dtype = framework::SizeOfType(ele_dtype);
    }
-    PADDLE_ENFORCE_EQ(ele_type, *dtype);
+    PADDLE_ENFORCE_EQ(
        ele_dtype, *dtype,
        platform::errors::InvalidArgument(
            "The DataType of grad tensors of fused_all_reduce_op_handle  "
            "must be consistent. The current dtype is %s, but the "
            "previous dtype is %s.",
            DataTypeToString(ele_dtype), DataTypeToString(*dtype)));
    // Get element number
    int64_t len = grad_tensor.at(i).second->numel();
-    PADDLE_ENFORCE_GT(len, 0);
+    PADDLE_ENFORCE_GT(
        len, 0, platform::errors::InvalidArgument(
                    "The size of grad tensors of fused_all_reduce_op_handle  "
                    "must be > 0, but got %d.",
                    len));
    *numel +=
        platform::Alignment(len * size_of_dtype, places_[0]) / size_of_dtype;
  }
--- a/paddle/fluid/framework/details/fused_broadcast_op_handle.cc
+++ b/paddle/fluid/framework/details/fused_broadcast_op_handle.cc
@ -13,6 +13,7 @@
 // limitations under the License.
 #include "paddle/fluid/framework/details/fused_broadcast_op_handle.h"
 #include "paddle/fluid/framework/details/container_cast.h"
 #include "paddle/fluid/framework/details/variable_visitor.h"
 #include "paddle/fluid/platform/profiler.h"
@ -32,7 +33,15 @@ void FusedBroadcastOpHandle::RunImpl() {
  WaitInputVarGenerated();
  size_t place_num = places_.size();
-  PADDLE_ENFORCE_EQ(in_var_handles.size() * place_num, out_var_handles.size());
+  PADDLE_ENFORCE_EQ(
      in_var_handles.size() * place_num, out_var_handles.size(),
      platform::errors::PreconditionNotMet(
          "The number of input variable handles plus the number "
          "of places should be equal to the number of output variable handles, "
          "but got the number of input variable handles is %d, the "
          "number of places is %d, and the number of output variable handles "
          "is %d.",
          in_var_handles.size(), place_num, out_var_handles.size()));
  for (size_t i = 0; i < in_var_handles.size(); ++i) {
    BroadcastOneVar(
--- a/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc
+++ b/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc
@ -13,8 +13,10 @@
 // limitations under the License.
 #include "paddle/fluid/framework/details/fused_broadcast_op_handle.h"
 #include <memory>
 #include <unordered_map>
 #include "gtest/gtest.h"
 #include "paddle/fluid/framework/details/broadcast_op_handle_test.h"
 #include "paddle/fluid/framework/details/op_handle_base.h"
@ -58,7 +60,8 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
      op_handle_ = new FusedBroadcastOpHandle(
          nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get());
 #else
-      PADDLE_THROW("CUDA is not supported.");
+      PADDLE_THROW(
          platform::errors::PreconditionNotMet("Not compiled with CUDA."));
 #endif
    } else {
 #if defined(PADDLE_WITH_NCCL)
--- a/paddle/fluid/framework/details/gather_op_handle.cc
+++ b/paddle/fluid/framework/details/gather_op_handle.cc
@ -13,6 +13,7 @@
 // limitations under the License.
 #include "paddle/fluid/framework/details/gather_op_handle.h"
 #include "paddle/fluid/framework/details/container_cast.h"
 #include "paddle/fluid/framework/details/variable_visitor.h"
@ -32,13 +33,20 @@ void GatherOpHandle::RunImpl() {
  PADDLE_ENFORCE_EQ(
      in_var_handles.size(), places_.size(),
-      "The number of output should equal to the number of places.");
+      platform::errors::InvalidArgument(
          "The number of input variables should be equal "
          "to the number of places, but got the number of input variables is "
          "%d and the number of places is %d.",
          in_var_handles.size(), places_.size()));
  VarHandle *out_var_handle;
  {
    auto out_var_handles = DynamicCast<VarHandle>(this->Outputs());
-    PADDLE_ENFORCE_EQ(out_var_handles.size(), 1,
+    PADDLE_ENFORCE_EQ(
-                      "The number of output should be one.");
+        out_var_handles.size(), 1,
        platform::errors::InvalidArgument(
            "The number of output variables should be 1, but got %d.",
            out_var_handles.size()));
    out_var_handle = out_var_handles.front();
  }
@ -47,10 +55,14 @@ void GatherOpHandle::RunImpl() {
  auto in_0_handle = in_var_handles[0];
  auto pre_in_var =
      var_scopes.at(in_0_handle->scope_idx())->FindVar(in_0_handle->name());
-  PADDLE_ENFORCE_NOT_NULL(pre_in_var);
+  PADDLE_ENFORCE_NOT_NULL(
      pre_in_var,
      platform::errors::NotFound("The variable '%s' is not found in the scope.",
                                 in_0_handle->name()));
-  PADDLE_ENFORCE(pre_in_var->IsType<framework::SelectedRows>(),
+  PADDLE_ENFORCE_EQ(pre_in_var->IsType<framework::SelectedRows>(), true,
-                 "Currently, gather_op only can gather SelectedRows.");
+                    platform::errors::Unimplemented(
                        "Currently, gather_op only supports SelectedRows."));
  // Wait input done, this Wait is asynchronous operation
  WaitInputVarGenerated();
@ -63,7 +75,10 @@ void GatherOpHandle::RunImpl() {
  for (auto *in_handle : in_var_handles) {
    auto *in_var =
        var_scopes.at(in_handle->scope_idx())->FindVar(in_handle->name());
-    PADDLE_ENFORCE_NOT_NULL(in_var);
+    PADDLE_ENFORCE_NOT_NULL(
        in_var,
        platform::errors::NotFound(
            "The variable '%s' is not found in the scope.", in_handle->name()));
    VariableVisitor::EnforceShapeAndDTypeEQ(*in_var, *pre_in_var);
    auto &in_sr_value = in_var->Get<framework::SelectedRows>();
@ -76,15 +91,19 @@ void GatherOpHandle::RunImpl() {
  // NOTE: The Places of all input tensor must be all on CPU or all on GPU.
  platform::Place t_out_p = out_var_handle->place();
  if (platform::is_gpu_place(pre_in_value.place())) {
-    PADDLE_ENFORCE(platform::is_gpu_place(t_out_p),
+    PADDLE_ENFORCE_EQ(platform::is_gpu_place(t_out_p), true,
-                   "Places of input and output must be all on GPU.");
+                      platform::errors::PreconditionNotMet(
                          "Places of input and output must be all on GPU."));
  } else {
    t_out_p = platform::CPUPlace();
  }
  auto out_var = var_scopes.at(out_var_handle->scope_idx())
                     ->FindVar(out_var_handle->name());
-  PADDLE_ENFORCE_NOT_NULL(out_var);
+  PADDLE_ENFORCE_NOT_NULL(
      out_var,
      platform::errors::NotFound("The variable '%s' is not found in the scope.",
                                 out_var_handle->name()));
  auto out_value = out_var->GetMutable<framework::SelectedRows>();
  out_value->set_height(pre_in_value.height());
  out_value->set_rows(out_rows);
--- a/paddle/fluid/framework/details/gather_op_handle_test.cc
+++ b/paddle/fluid/framework/details/gather_op_handle_test.cc
@ -13,8 +13,10 @@
 // limitations under the License.
 #include "paddle/fluid/framework/details/gather_op_handle.h"
 #include <memory>
 #include <unordered_map>
 #include "gtest/gtest.h"
 namespace paddle {
@ -60,7 +62,8 @@ struct TestGatherOpHandle {
        ctxs_.emplace_back(new p::CUDADeviceContext(p));
      }
 #else
-      PADDLE_THROW("CUDA is not support.");
+      PADDLE_THROW(
          platform::errors::PreconditionNotMet("Not compiled with CUDA."));
 #endif
    } else {
      int count = 8;
@ -141,7 +144,9 @@ struct TestGatherOpHandle {
    for (size_t input_scope_idx = 0; input_scope_idx < gpu_list_.size();
         ++input_scope_idx) {
      auto in_var = param_scopes_.at(input_scope_idx)->FindVar("input");
-      PADDLE_ENFORCE_NOT_NULL(in_var);
+      PADDLE_ENFORCE_NOT_NULL(
          in_var, platform::errors::NotFound(
                      "The variable '%s' is not found in the scope.", "input"));
      auto in_selected_rows = in_var->GetMutable<f::SelectedRows>();
      auto value = in_selected_rows->mutable_value();
      value->mutable_data<float>(kDims, gpu_list_[input_scope_idx]);
@ -155,7 +160,9 @@ struct TestGatherOpHandle {
    }
    auto out_var = param_scopes_.at(output_scope_idx)->FindVar("out");
-    PADDLE_ENFORCE_NOT_NULL(out_var);
+    PADDLE_ENFORCE_NOT_NULL(
        out_var, platform::errors::NotFound(
                     "The variable '%s' is not found in the scope.", "out"));
    auto out_selected_rows = out_var->GetMutable<f::SelectedRows>();
    auto in_var = param_scopes_.at(output_scope_idx)->FindVar("input");
@ -173,9 +180,19 @@ struct TestGatherOpHandle {
    auto& out_select_rows = out_var->Get<f::SelectedRows>();
    auto rt = out_select_rows.value();
-    PADDLE_ENFORCE_EQ(out_select_rows.height(), height, "height is not equal.");
+    PADDLE_ENFORCE_EQ(out_select_rows.height(), height,
                      platform::errors::InvalidArgument(
                          "The height of SelectedRows is not equal to "
                          "the expected, expect %d, but got %d.",
                          height, out_select_rows.height()));
    for (size_t k = 0; k < out_select_rows.rows().size(); ++k) {
-      PADDLE_ENFORCE_EQ(out_select_rows.rows()[k], rows[k % rows.size()]);
+      PADDLE_ENFORCE_EQ(
          out_select_rows.rows()[k], rows[k % rows.size()],
          platform::errors::InvalidArgument(
              "The item at position %d of rows of SelectedRows is not equal to "
              "the expected, expect %d, but got %d.",
              k, rows[k % rows.size()], out_select_rows.rows()[k]));
    }
    f::Tensor result_tensor;
@ -207,6 +224,7 @@ TEST(GatherTester, TestGPUGatherTestSelectedRows) {
  test_op.TestGatherSelectedRows(input_scope_idx);
 }
 #endif
 }  // namespace details
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/details/nccl_op_handle.h
+++ b/paddle/fluid/framework/details/nccl_op_handle.h
@ -46,14 +46,17 @@ class NCCLOpHandleBase : public OpHandleBase {
  }
  virtual ~NCCLOpHandleBase() {
    for (auto& ev : inter_events_) {
-      PADDLE_ENFORCE(cudaEventDestroy(ev.second));
+      PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventDestroy(ev.second));
    }
    for (auto& ev : exter_events_) {
-      PADDLE_ENFORCE(cudaEventDestroy(ev.second));
+      PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventDestroy(ev.second));
    }
  }
  void SetRunEnv(int run_order, bool use_hierarchical_allreduce) {
-    PADDLE_ENFORCE(run_order >= 0, "run_order must >= 0");
+    PADDLE_ENFORCE_GE(
        run_order, 0,
        platform::errors::InvalidArgument(
            "The argument run_order must be >= 0, but got %d.", run_order));
    run_order_ = run_order;
    use_hierarchical_allreduce_ = use_hierarchical_allreduce;
@ -74,8 +77,11 @@ class NCCLOpHandleBase : public OpHandleBase {
      return;
    }
-    PADDLE_ENFORCE(places_.size() == 1,
+    PADDLE_ENFORCE_EQ(places_.size(), 1,
-                   "HierarchicalAllReduce run one proc with one card mode.");
+                      platform::errors::InvalidArgument(
                          "HierarchicalAllReduce can only run "
                          "one proccess with one card mode, but got %d cards.",
                          places_.size()));
    for (auto& p : places_) {
      auto ctxs = nccl_ctxs_->GetHierarchicalInterCtx(run_order);
@ -88,11 +94,11 @@ class NCCLOpHandleBase : public OpHandleBase {
        continue;
      }
-      PADDLE_ENFORCE(cudaSetDevice(dev_id));
+      PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(dev_id));
-      PADDLE_ENFORCE(cudaEventCreateWithFlags(&inter_events_[dev_id],
+      PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventCreateWithFlags(
-                                              cudaEventDisableTiming));
+          &inter_events_[dev_id], cudaEventDisableTiming));
-      PADDLE_ENFORCE(cudaEventCreateWithFlags(&exter_events_[dev_id],
+      PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventCreateWithFlags(
-                                              cudaEventDisableTiming));
+          &exter_events_[dev_id], cudaEventDisableTiming));
      VLOG(10) << "Create events on dev_id:" << dev_id
               << ", inter_event:" << &inter_events_[dev_id]
               << ", exter_event:" << &exter_events_[dev_id];
@ -102,7 +108,10 @@ class NCCLOpHandleBase : public OpHandleBase {
  void FlatNCCLAllReduce(platform::Place place, const void* sendbuff,
                         void* recvbuff, size_t count, ncclDataType_t datatype,
                         ncclRedOp_t op) {
-    PADDLE_ENFORCE(run_order_ >= 0, "run_order must > 0");
+    PADDLE_ENFORCE_GE(
        run_order_, 0,
        platform::errors::InvalidArgument(
            "The argument run_order_ must be >= 0, but got %d.", run_order_));
    auto flat_nccl_ctxs = nccl_ctxs_->GetFlatCtx(run_order_);
    int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device;
    auto& nccl_ctx = flat_nccl_ctxs->at(dev_id);
@ -113,14 +122,17 @@ class NCCLOpHandleBase : public OpHandleBase {
             << ", dev_id:" << dev_id << ", dtype:" << datatype
             << ", place:" << place;
-    PADDLE_ENFORCE(platform::dynload::ncclAllReduce(
+    PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclAllReduce(
        sendbuff, recvbuff, count, datatype, op, comm, stream));
  }
  void NCCLAllReduce(platform::Place place, const void* sendbuff,
                     void* recvbuff, size_t count, ncclDataType_t datatype,
                     ncclRedOp_t op) {
-    PADDLE_ENFORCE(run_order_ >= 0, "run_order must > 0");
+    PADDLE_ENFORCE_GE(
        run_order_, 0,
        platform::errors::InvalidArgument(
            "The argument run_order_ must be >= 0, but got %d.", run_order_));
    if (!use_hierarchical_allreduce_) {
      FlatNCCLAllReduce(place, sendbuff, recvbuff, count, datatype, op);
      return;
@ -132,7 +144,10 @@ class NCCLOpHandleBase : public OpHandleBase {
  void HierarchicalAllReduce(platform::Place place, const void* sendbuff,
                             void* recvbuff, size_t count,
                             ncclDataType_t datatype, ncclRedOp_t op) {
-    PADDLE_ENFORCE(run_order_ >= 0, "run_order must > 0");
+    PADDLE_ENFORCE_GE(
        run_order_, 0,
        platform::errors::InvalidArgument(
            "The argument run_order_ must be >= 0, but got %d.", run_order_));
    InterReduce(place, sendbuff, recvbuff, count, datatype, op);
    // When a trainer is not in exter allreduce ring
    // they need not to call this.
@ -157,14 +172,13 @@ class NCCLOpHandleBase : public OpHandleBase {
             << ", dtype:" << datatype << ", place:" << place
             << ", stream:" << stream;
-    PADDLE_ENFORCE(platform::dynload::ncclReduce(
+    PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclReduce(
        sendbuff, recvbuff, count, datatype, ncclSum, 0, comm, stream));
    cudaEventRecord(inter_events_.at(dev_id), stream);
    if (FLAGS_sync_nccl_allreduce) {
-      PADDLE_ENFORCE(cudaStreamSynchronize(stream),
+      PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream));
                     "sync HierarchicalAllReduce inter stream error");
    }
  }
@ -172,7 +186,9 @@ class NCCLOpHandleBase : public OpHandleBase {
                      void* recvbuff, size_t count, ncclDataType_t datatype,
                      ncclRedOp_t op) {
    auto nccl_ctxs = nccl_ctxs_->GetHierarchicalExterCtx(run_order_);
-    PADDLE_ENFORCE(nccl_ctxs_, "can't get exter %d nccl_ctxs", run_order_);
+    PADDLE_ENFORCE_NOT_NULL(
        nccl_ctxs_, platform::errors::NotFound(
                        "Can't get exter %d nccl contexts.", run_order_));
    int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device;
    auto& nccl_ctx = nccl_ctxs->at(dev_id);
    auto stream = nccl_ctx.stream();
@ -185,14 +201,13 @@ class NCCLOpHandleBase : public OpHandleBase {
    cudaStreamWaitEvent(stream, inter_events_.at(dev_id), 0);
-    PADDLE_ENFORCE(platform::dynload::ncclAllReduce(
+    PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclAllReduce(
        sendbuff, recvbuff, count, datatype, op, comm, stream));
    cudaEventRecord(exter_events_.at(dev_id), stream);
    if (FLAGS_sync_nccl_allreduce) {
-      PADDLE_ENFORCE(cudaStreamSynchronize(stream),
+      PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream));
                     "sync HierarchicalAllReduce exter stream error");
    }
  }
@ -210,8 +225,8 @@ class NCCLOpHandleBase : public OpHandleBase {
             << ", stream:" << stream;
    cudaStreamWaitEvent(stream, exter_events_.at(dev_id), 0);
-    PADDLE_ENFORCE(platform::dynload::ncclBcast(sendbuff, count, datatype, 0,
+    PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclBcast(
-                                                comm, stream));
+        sendbuff, count, datatype, 0, comm, stream));
  }
 protected:
--- a/paddle/fluid/framework/details/op_handle_base.cc
+++ b/paddle/fluid/framework/details/op_handle_base.cc
@ -47,8 +47,8 @@ void OpHandleBase::InitCUDA() {
 #ifdef PADDLE_WITH_CUDA
  for (auto &p : dev_ctxes_) {
    int dev_id = BOOST_GET_CONST(platform::CUDAPlace, p.first).device;
-    PADDLE_ENFORCE(cudaSetDevice(dev_id));
+    PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(dev_id));
-    PADDLE_ENFORCE(
+    PADDLE_ENFORCE_CUDA_SUCCESS(
        cudaEventCreateWithFlags(&events_[dev_id], cudaEventDisableTiming));
  }
  if (IsMultiDeviceTransfer() && dev_ctxes_.size() > 0) {
@ -62,17 +62,22 @@ void OpHandleBase::InitCUDA() {
      }
    }
  } else {
-    PADDLE_ENFORCE_EQ(dev_ctxes_.size(), 1UL,
+    PADDLE_ENFORCE_EQ(
-                      "%s should have only one dev_ctx.", Name());
+        dev_ctxes_.size(), 1UL,
        platform::errors::InvalidArgument(
            "Operator %s should have only one dev_ctx, but got %d.", Name(),
            dev_ctxes_.size()));
    auto &place = dev_ctxes_.begin()->first;
    int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device;
    for (auto &out_var : outputs_) {
      auto *out_var_handle = dynamic_cast<VarHandle *>(out_var);
      if (out_var_handle) {
-        PADDLE_ENFORCE(platform::is_same_place(place, out_var_handle->place()),
+        PADDLE_ENFORCE_EQ(
-                       "The place of output(%s) is not consistent with the "
+            platform::is_same_place(place, out_var_handle->place()), true,
-                       "place of current op(%s).",
+            platform::errors::InvalidArgument(
-                       out_var_handle->Name(), Name());
+                "The place of output(%s) is not consistent with the "
                "place of current op(%s).",
                out_var_handle->Name(), Name()));
        out_var_handle->SetGenerateEvent(events_.at(dev_id));
      }
    }
@ -86,7 +91,10 @@ void OpHandleBase::Run(bool use_cuda) {
    InitCUDA();
  }
 #else
-  PADDLE_ENFORCE(!use_cuda);
+  PADDLE_ENFORCE_EQ(use_cuda, false,
                    platform::errors::InvalidArgument(
                        "Argument use_cuda should be false when Paddle is not "
                        "compiled with CUDA."));
 #endif
  // skip running current op, used with inplace_addto_op_pass
@ -100,17 +108,20 @@ void OpHandleBase::Run(bool use_cuda) {
 void OpHandleBase::RecordWaitEventOnCtx(platform::DeviceContext *waited_ctx) {
 #ifdef PADDLE_WITH_CUDA
-  PADDLE_ENFORCE_NOT_NULL(waited_ctx);
+  PADDLE_ENFORCE_NOT_NULL(waited_ctx, platform::errors::InvalidArgument(
                                          "Argument waited_ctx is NULL."));
  if (platform::is_cpu_place(waited_ctx->GetPlace()) || events_.empty()) {
    for (auto &dev_ctx : dev_ctxes_) {
-      PADDLE_ENFORCE_NOT_NULL(dev_ctx.second);
+      PADDLE_ENFORCE_NOT_NULL(
          dev_ctx.second,
          platform::errors::InvalidArgument("The device context is NULL."));
      dev_ctx.second->Wait();
    }
  } else {
    auto stream =
        static_cast<platform::CUDADeviceContext *>(waited_ctx)->stream();
    for (auto &ev : events_) {
-      PADDLE_ENFORCE(cudaStreamWaitEvent(stream, ev.second, 0));
+      PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamWaitEvent(stream, ev.second, 0));
    }
  }
 #else
@ -145,10 +156,11 @@ void OpHandleBase::WaitInputVarGenerated() {
          auto stream =
              static_cast<platform::CUDADeviceContext *>(dev_ctxes_.at(place))
                  ->stream();
-          PADDLE_ENFORCE(
+          PADDLE_ENFORCE_CUDA_SUCCESS(
              cudaStreamWaitEvent(stream, in_var_handle->GetEvent(), 0));
 #else
-          PADDLE_THROW("Doesn't compile the GPU.");
+          PADDLE_THROW(
              platform::errors::PreconditionNotMet("Not compiled with CUDA."));
 #endif
        }
        // There are nothing to do when the place is CPUPlace.
@ -169,10 +181,11 @@ void OpHandleBase::WaitInputVarGenerated(const platform::Place &place) {
          auto stream = static_cast<platform::CUDADeviceContext *>(
                            dev_ctxes_.at(in_var_handle->place()))
                            ->stream();
-          PADDLE_ENFORCE(
+          PADDLE_ENFORCE_CUDA_SUCCESS(
              cudaStreamWaitEvent(stream, in_var_handle->GetEvent(), 0));
 #else
-          PADDLE_THROW("Doesn't compile the GPU.");
+          PADDLE_THROW(
              platform::errors::PreconditionNotMet("Not compiled with CUDA."));
 #endif
        }
        // There are nothing to do when the place is CPUPlace.
@ -242,7 +255,9 @@ void OpHandleBase::SetLocalExecScopes(
  auto scopes = GetLocalScopes();
  for (auto *scope : scopes) {
    auto iter = scope_map.find(scope);
-    PADDLE_ENFORCE(iter != scope_map.end(), "Local scope not found");
+    PADDLE_ENFORCE_NE(
        iter, scope_map.end(),
        platform::errors::NotFound("Local scope not found in scope map."));
    local_exec_scopes_.emplace_back(iter->second);
  }
 }
--- a/paddle/fluid/framework/details/op_registry.h
+++ b/paddle/fluid/framework/details/op_registry.h
@ -21,6 +21,7 @@ limitations under the License. */
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
 #include "paddle/fluid/framework/grad_op_desc_maker.h"
 #include "paddle/fluid/framework/inplace_op_inference.h"
 #include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
@ -186,19 +187,20 @@ struct OpInfoFiller<T, kOpProtoAndCheckerMaker> {
  void operator()(const char* op_type, OpInfo* info) const {
    PADDLE_ENFORCE_EQ(info->proto_, nullptr,
                      platform::errors::AlreadyExists(
-                          "OpProto of %s has been registered", op_type));
+                          "OpProto of %s has been registered.", op_type));
    PADDLE_ENFORCE_EQ(info->checker_, nullptr,
                      platform::errors::AlreadyExists(
-                          "OpAttrChecker of %s has been registered", op_type));
+                          "OpAttrChecker of %s has been registered.", op_type));
    info->proto_ = new proto::OpProto;
    info->checker_ = new OpAttrChecker();
    T maker;
    maker(info->proto_, info->checker_);
    info->proto_->set_type(op_type);
-    PADDLE_ENFORCE(
+    PADDLE_ENFORCE_EQ(
-        info->proto_->IsInitialized(),
+        info->proto_->IsInitialized(), true,
-        "Fail to initialize %s's OpProto, because %s is not initialized",
+        platform::errors::PreconditionNotMet(
-        op_type, info->proto_->InitializationErrorString());
+            "Fail to initialize %s's OpProto, because %s is not initialized.",
            op_type, info->proto_->InitializationErrorString()));
  }
 };
--- a/paddle/fluid/framework/details/reduce_and_gather.h
+++ b/paddle/fluid/framework/details/reduce_and_gather.h
@ -16,6 +16,7 @@
 #include <algorithm>
 #include <map>
 #include <vector>
 #include "paddle/fluid/framework/details/reduce_and_gather.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/selected_rows.h"
@ -32,9 +33,13 @@ struct ReduceLoDTensor {
  template <typename T>
  void apply() const {
-    PADDLE_ENFORCE(!src_tensors_.empty());
+    PADDLE_ENFORCE_NE(src_tensors_.empty(), true,
                      platform::errors::InvalidArgument(
                          "The number of tensors to be reduced is 0."));
    auto &t0 = *src_tensors_[0];
-    PADDLE_ENFORCE_NE(t0.numel(), 0);
+    PADDLE_ENFORCE_NE(t0.numel(), 0,
                      platform::errors::InvalidArgument(
                          "The size of first tensor to be reduced is 0."));
    dst_tensor_.Resize(t0.dims());
    T *dst = dst_tensor_.mutable_data<T>(platform::CPUPlace());
@ -45,8 +50,19 @@ struct ReduceLoDTensor {
        continue;
      }
-      PADDLE_ENFORCE_EQ(t.dims(), t0.dims());
+      PADDLE_ENFORCE_EQ(t.dims(), t0.dims(),
-      PADDLE_ENFORCE_EQ(t.type(), t0.type());
+                        platform::errors::InvalidArgument(
                            "The shape of tensors to be reduced must be "
                            "consistent. The shape of current tensor is %s, "
                            "but the shape of the first tensor is %s.",
                            t.dims(), t0.dims()));
      PADDLE_ENFORCE_EQ(t.type(), t0.type(),
                        platform::errors::InvalidArgument(
                            "The type of tensors to be reduced must be "
                            "consistent. The type of current tensor is %s, "
                            "but the type of the first tensor is %s.",
                            t.type(), t0.type()));
      std::transform(t.data<T>(), t.data<T>() + t.numel(), dst, dst,
                     [](T a, T b) -> T { return a + b; });
    }
@ -88,7 +104,9 @@ struct GatherLocalSelectedRowsFunctor {
        in_places_(in_places),
        out_place_(out_place),
        dst_selected_rows_(dst_selected_rows) {
-    PADDLE_ENFORCE_EQ(src_selected_rows.empty(), false);
+    PADDLE_ENFORCE_NE(src_selected_rows.empty(), true,
                      platform::errors::InvalidArgument(
                          "The number of selected_rows to be gathered is 0."));
    std::vector<int64_t> out_rows;
--- a/paddle/fluid/framework/details/reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/reduce_op_handle.cc
@ -13,7 +13,9 @@
 // limitations under the License.
 #include "paddle/fluid/framework/details/reduce_op_handle.h"
 #include <memory>
 #include "paddle/fluid/framework/details/container_cast.h"
 #include "paddle/fluid/framework/details/reduce_and_gather.h"
 #include "paddle/fluid/framework/details/variable_visitor.h"
@ -116,8 +118,15 @@ void ReduceOpHandle::GatherSelectedRows(
  merged_dev_ctx->Wait();
  scope->EraseVars(std::vector<std::string>{gathered_var_name});
-  PADDLE_ENFORCE(client->Gather(vars, &remote, *merged_dev_ctx, scope));
+  PADDLE_ENFORCE_EQ(
-  PADDLE_ENFORCE(remote.size() == vars.size());
+      client->Gather(vars, &remote, *merged_dev_ctx, scope), true,
      platform::errors::PreconditionNotMet("Gather SelectedRows failed."));
  PADDLE_ENFORCE_EQ(remote.size(), vars.size(),
                    platform::errors::PreconditionNotMet(
                        "The number of remotes should be equal to the number "
                        "of variables to be gathered, but got the number of "
                        "remotes is %d and the number of variables is %d.",
                        remote.size(), vars.size()));
  // 4. merged local selected rows.
  std::vector<const SelectedRows *> all;
@ -151,14 +160,19 @@ void ReduceOpHandle::RunImpl() {
  PADDLE_ENFORCE_EQ(
      in_var_handles.size(), places_.size(),
-      "The number of output should equal to the number of places.");
+      platform::errors::InvalidArgument(
          "The number of inputs should equal to the number of places, but got "
          "the number of inputs is %d and the number of places is %d.",
          in_var_handles.size(), places_.size()));
  VarHandle *out_var_handle;
  {
    auto out_var_handles = DynamicCast<VarHandle>(outputs_);
    PADDLE_ENFORCE_EQ(out_var_handles.size(), 1UL,
-                      "The number of output should be one.");
+                      platform::errors::InvalidArgument(
                          "The number of output should be one, but got %d.",
                          out_var_handles.size()));
    out_var_handle = out_var_handles.front();
  }
@ -168,7 +182,10 @@ void ReduceOpHandle::RunImpl() {
  auto pre_in_var =
      var_scopes.at(in_0_handle->scope_idx())->FindVar(in_0_handle->name());
-  PADDLE_ENFORCE_NOT_NULL(pre_in_var);
+
  PADDLE_ENFORCE_NOT_NULL(pre_in_var, platform::errors::NotFound(
                                          "Variable %s is not found in scope.",
                                          in_0_handle->name()));
  // NOTE: The Places of all input tensor must be all on CPU or all on GPU.
  std::vector<platform::Place> in_places;  // used to get dev_ctx
@ -176,21 +193,29 @@ void ReduceOpHandle::RunImpl() {
    in_places.emplace_back(in_handle->place());
    auto in_var =
        var_scopes.at(in_handle->scope_idx())->FindVar(in_handle->name());
-    PADDLE_ENFORCE_NOT_NULL(in_var);
+
    PADDLE_ENFORCE_NOT_NULL(
        in_var, platform::errors::NotFound("Variable %s is not found in scope.",
                                           in_handle->name()));
    VariableVisitor::EnforceShapeAndDTypeEQ(*pre_in_var, *in_var);
  }
  auto out_var = var_scopes.at(out_var_handle->scope_idx())
                     ->FindVar(out_var_handle->name());
-  PADDLE_ENFORCE_NOT_NULL(out_var);
+
  PADDLE_ENFORCE_NOT_NULL(
      out_var, platform::errors::NotFound("Variable %s is not found in scope.",
                                          out_var_handle->name()));
  // NOTE: The tensors' Place of input and output must be all on GPU or all on
  // CPU.
  auto in_p = VariableVisitor::GetMutableTensor(pre_in_var).place();
  platform::Place t_out_p;
  if (platform::is_gpu_place(in_p)) {
-    PADDLE_ENFORCE(platform::is_gpu_place(out_var_handle->place()),
+    PADDLE_ENFORCE_EQ(platform::is_gpu_place(out_var_handle->place()), true,
-                   "Places of input and output must be all on GPU.");
+                      platform::errors::PreconditionNotMet(
                          "Places of input and output must be all on GPU."));
    t_out_p = out_var_handle->place();
  } else {
    t_out_p = platform::CPUPlace();
@ -229,7 +254,10 @@ void ReduceOpHandle::RunImpl() {
            in_selected_rows, in_places, dev_ctxes_, out_var_handle, t_out_p,
            out_var->GetMutable<framework::SelectedRows>());
      } else {
-        PADDLE_THROW("only support double or float when gather SelectedRows");
+        PADDLE_THROW(platform::errors::Unimplemented(
            "Only support double or float when gather SelectedRows, but got "
            "%s.",
            framework::DataTypeToString(in_selected_rows[0]->value().type())));
      }
 #endif
    });
@ -292,7 +320,7 @@ void ReduceOpHandle::RunImpl() {
        size_t numel = static_cast<size_t>(lod_tensor.numel());
        all_reduce_calls.emplace_back(
            [buffer, recvbuffer, type, numel, root_id, &nccl_ctx] {
-              PADDLE_ENFORCE(platform::dynload::ncclReduce(
+              PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclReduce(
                  buffer, recvbuffer, numel, static_cast<ncclDataType_t>(type),
                  ncclSum, root_id, nccl_ctx.comm_, nccl_ctx.stream()));
            });
@ -306,10 +334,13 @@ void ReduceOpHandle::RunImpl() {
        }
      });
 #else
-      PADDLE_THROW("CUDA is not enabled.");
+      PADDLE_THROW(
          platform::errors::PreconditionNotMet("Not compiled with CUDA."));
 #endif
    } else {
-      PADDLE_THROW("Place should be CPUPlace or CUDAPlace.");
+      PADDLE_THROW(platform::errors::InvalidArgument(
          "The place of tensor should be CPUPlace or CUDAPlace, but got %s.",
          lod_tensors[0]->place()));
    }
  }
 }
--- a/paddle/fluid/framework/details/reduce_op_handle_test.cc
+++ b/paddle/fluid/framework/details/reduce_op_handle_test.cc
@ -13,7 +13,9 @@
 // limitations under the License.
 #include "paddle/fluid/framework/details/reduce_op_handle.h"
 #include <unordered_map>
 #include "gtest/gtest.h"
 #include "paddle/fluid/platform/device_context.h"
@ -69,7 +71,8 @@ struct TestReduceOpHandle {
      }
      nccl_ctxs_.reset(new platform::NCCLContextMap(gpu_list_));
 #else
-      PADDLE_THROW("CUDA is not support.");
+      PADDLE_THROW(
          platform::errors::PreconditionNotMet("Not compiled with NCLL."));
 #endif
    } else {
      int count = 8;
@ -103,7 +106,8 @@ struct TestReduceOpHandle {
      op_handle_.reset(new ReduceOpHandle(nodes.back().get(), local_scopes_,
                                          gpu_list_, nccl_ctxs_.get()));
 #else
-      PADDLE_THROW("CUDA is not support.");
+      PADDLE_THROW(
          platform::errors::PreconditionNotMet("Not compiled with NCLL."));
 #endif
    } else {
 #if defined(PADDLE_WITH_NCCL)
@ -164,7 +168,10 @@ struct TestReduceOpHandle {
    for (size_t input_scope_idx = 0; input_scope_idx < gpu_list_.size();
         ++input_scope_idx) {
      auto in_var = param_scopes_[input_scope_idx]->FindVar("input");
-      PADDLE_ENFORCE_NOT_NULL(in_var);
+
      PADDLE_ENFORCE_NOT_NULL(
          in_var, platform::errors::NotFound(
                      "Variable %s is not found in scope.", "input"));
      auto in_selected_rows = in_var->GetMutable<f::SelectedRows>();
      auto value = in_selected_rows->mutable_value();
      value->mutable_data<float>(kDims, gpu_list_[input_scope_idx]);
@ -178,7 +185,9 @@ struct TestReduceOpHandle {
    }
    auto out_var = param_scopes_[output_scope_idx]->FindVar("out");
-    PADDLE_ENFORCE_NOT_NULL(out_var);
+    PADDLE_ENFORCE_NOT_NULL(out_var,
                            platform::errors::NotFound(
                                "Variable %s is not found in scope.", "out"));
    auto out_selected_rows = out_var->GetMutable<f::SelectedRows>();
    auto in_var = param_scopes_[output_scope_idx]->FindVar("input");
@ -196,9 +205,18 @@ struct TestReduceOpHandle {
    auto &out_select_rows = out_var->Get<f::SelectedRows>();
    auto rt = out_select_rows.value();
-    PADDLE_ENFORCE_EQ(out_select_rows.height(), height, "height is not equal.");
+    PADDLE_ENFORCE_EQ(out_select_rows.height(), height,
                      platform::errors::InvalidArgument(
                          "The height of SelectedRows is not equal to "
                          "the expected, expect %d, but got %d.",
                          height, out_select_rows.height()));
    for (size_t k = 0; k < out_select_rows.rows().size(); ++k) {
-      PADDLE_ENFORCE_EQ(out_select_rows.rows()[k], rows[k % rows.size()]);
+      PADDLE_ENFORCE_EQ(
          out_select_rows.rows()[k], rows[k % rows.size()],
          platform::errors::InvalidArgument(
              "The item at position %d of rows of SelectedRows is not equal to "
              "the expected, expect %d, but got %d.",
              k, rows[k % rows.size()], out_select_rows.rows()[k]));
    }
    f::Tensor result_tensor;
@ -208,7 +226,7 @@ struct TestReduceOpHandle {
    for (int64_t j = 0; j < f::product(result_tensor.dims()); ++j) {
      ASSERT_NEAR(ct[j], send_vector[j % send_vector.size()], 1e-5);
    }
-  }
+  }  // namespace details
  void TestReduceLodTensors(size_t output_scope_idx) {
    std::vector<float> send_vector(static_cast<size_t>(f::product(kDims)));
@ -220,7 +238,9 @@ struct TestReduceOpHandle {
    for (size_t input_scope_idx = 0; input_scope_idx < gpu_list_.size();
         ++input_scope_idx) {
      auto in_var = param_scopes_[input_scope_idx]->FindVar("input");
-      PADDLE_ENFORCE_NOT_NULL(in_var);
+      PADDLE_ENFORCE_NOT_NULL(
          in_var, platform::errors::NotFound(
                      "Variable %s is not found in scope.", "input"));
      auto in_lod_tensor = in_var->GetMutable<f::LoDTensor>();
      in_lod_tensor->mutable_data<float>(kDims, gpu_list_[input_scope_idx]);
      in_lod_tensor->set_lod(lod);
@ -230,7 +250,9 @@ struct TestReduceOpHandle {
    }
    auto out_var = param_scopes_[output_scope_idx]->FindVar("out");
-    PADDLE_ENFORCE_NOT_NULL(out_var);
+    PADDLE_ENFORCE_NOT_NULL(out_var,
                            platform::errors::NotFound(
                                "Variable %s is not found in scope.", "out"));
    auto out_lodtensor = out_var->GetMutable<f::LoDTensor>();
    auto in_var = param_scopes_[output_scope_idx]->FindVar("input");
@ -254,7 +276,7 @@ struct TestReduceOpHandle {
      ASSERT_NEAR(ct[j], send_vector[j] * gpu_list_.size(), 1e-5);
    }
  }
-};
+};  // namespace details
 TEST(ReduceTester, TestCPUReduceTestSelectedRows) {
  TestReduceOpHandle test_op;
--- a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc
+++ b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc
@ -111,13 +111,12 @@ void ShareTensorBufferFunctor::CallOnce() {
    auto *out_var = exec_scope_->FindVar(out_var_names_[i]);
    PADDLE_ENFORCE_NOT_NULL(
        in_var, platform::errors::NotFound(
-                    "The input variable(%s)to be inplaced should not be NULL.",
+                    "The variable(%s) to be inplaced is not found in scope.",
                    in_var_infos_[i]->Name()));
    PADDLE_ENFORCE_NOT_NULL(
-        out_var,
+        out_var, platform::errors::NotFound(
-        platform::errors::NotFound(
+                     "The variable(%s) to be inplaced is not found in scope.",
-            "The output variable(%s) to be inplaced should not be NULL.",
+                     out_var_names_[i]));
            out_var_names_[i]));
    PADDLE_ENFORCE_NE(
        in_var, out_var,
        platform::errors::PreconditionNotMet(
--- a/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc
@ -12,8 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/framework/details/sparse_all_reduce_op_handle.h"
 #include <algorithm>
 #include <utility>
 #include "dgc/dgc.h"
 #include "paddle/fluid/framework/details/container_cast.h"
 #include "paddle/fluid/framework/details/reduce_and_gather.h"
@ -38,18 +40,23 @@ SparseAllReduceOpHandle::SparseAllReduceOpHandle(
      is_encoded_(is_encoded),
      nranks_(nranks) {
  // TODO(gongwb) :polish them!
-  PADDLE_ENFORCE_EQ(is_encoded, true);
+  PADDLE_ENFORCE_EQ(is_encoded, true, platform::errors::InvalidArgument(
                                          "The argument is_encoded is false."));
  VLOG(1) << "Use dgc allreduce mode"
          << ", nranks:" << nranks_;
-  PADDLE_ENFORCE_GT(local_scopes_.size(), 0);
+  PADDLE_ENFORCE_GT(local_scopes_.size(), 0,
                    platform::errors::PreconditionNotMet(
                        "The number of local scope should be > 0, but got %zu.",
                        local_scopes_.size()));
  auto nranks_name = g_dgc_nranks;
  for (size_t i = 0; i < local_scopes_.size(); ++i) {
    auto *local_scope = local_scopes_[i];
    auto nranks_var = local_scope->FindVar(nranks_name);
-    if (nranks_var == nullptr) {
+
-      PADDLE_THROW("not find nranks_var:%s", nranks_name);
+    PADDLE_ENFORCE_NOT_NULL(
-    }
+        nranks_var, platform::errors::NotFound(
                        "Variable %s is not found in scope.", nranks_name));
    float *dgc_nranks = nranks_var->GetMutable<LoDTensor>()->data<float>();
    *dgc_nranks = nranks;
@ -64,10 +71,18 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
  auto out_var_handles = DynamicCast<VarHandle>(this->Outputs());
  PADDLE_ENFORCE_EQ(
      in_var_handles.size(), places_.size(),
-      "The NoDummyInputSize should be equal to the number of places.");
+      platform::errors::PreconditionNotMet(
          "The number of input variables should be equal to the number of "
          "places, but got the number of input variables is %zu and the the "
          "number of places is %zu.",
          in_var_handles.size(), places_.size()));
  PADDLE_ENFORCE_EQ(
      in_var_handles.size(), out_var_handles.size(),
-      "The NoDummyInputSize and NoDummyOutputSize should be equal.");
+      platform::errors::PreconditionNotMet(
          "The number of input variables should be equal to the number of "
          "output variables, but got the number of input variables is %zu and "
          "the the number of output variables is %zu.",
          in_var_handles.size(), out_var_handles.size()));
  std::vector<const LoDTensor *> ins;
  std::vector<LoDTensor *> gathers;
@ -80,14 +95,17 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
    auto encode_var_name = original_name + g_dgc_encoded;
    auto *in_var = local_scope->FindVar(encode_var_name);
-    PADDLE_ENFORCE_NOT_NULL(in_var, "%s should not be null", encode_var_name);
+    PADDLE_ENFORCE_NOT_NULL(
        in_var, platform::errors::NotFound("Variable %s is not found in scope.",
                                           encode_var_name));
    auto &in = in_var->Get<LoDTensor>();
    ins.emplace_back(&in);
    auto gather_var_name = original_name + g_dgc_gather;
    auto *gather_var = local_scope->FindVar(gather_var_name);
-    PADDLE_ENFORCE_NOT_NULL(gather_var, "%s should not be null",
+    PADDLE_ENFORCE_NOT_NULL(
-                            gather_var_name);
+        gather_var, platform::errors::NotFound(
                        "Variable %s is not found in scope.", gather_var));
    auto *gather = gather_var->GetMutable<LoDTensor>();
    gathers.emplace_back(gather);
@ -100,14 +118,26 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
    }
  }
-  PADDLE_ENFORCE(platform::is_gpu_place(ins[0]->place()));
+  PADDLE_ENFORCE_EQ(
-  PADDLE_ENFORCE(platform::is_gpu_place(outs[0]->place()));
+      platform::is_gpu_place(ins[0]->place()), true,
-  PADDLE_ENFORCE(nccl_ctxs_, "nccl_ctxs should not be nullptr.");
+      platform::errors::InvalidArgument(
          "The place of input variable should be CUDAPlace, but got %s.",
          ins[0]->place()));
  PADDLE_ENFORCE_EQ(
      platform::is_gpu_place(outs[0]->place()), true,
      platform::errors::InvalidArgument(
          "The place of input variable should be CUDAPlace, but got %s.",
          outs[0]->place()));
  PADDLE_ENFORCE_NOT_NULL(nccl_ctxs_, platform::errors::PreconditionNotMet(
                                          "The nccl contexts are NULL."));
  int dtype = -1;
  size_t in_numel = 0;
  size_t out_numel = 0;
-  PADDLE_ENFORCE(nranks_ > 1);
+  PADDLE_ENFORCE_GT(
      nranks_, 1,
      platform::errors::PreconditionNotMet(
          "The number of ranks should be > 1, but got %d.", nranks_));
  std::vector<std::function<void()>> all_gather_calls;
  std::vector<std::function<void()>> sparse_reduce_calls;
@ -123,8 +153,16 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
    dtype = (dtype == -1) ? platform::ToNCCLDataType(in.type()) : dtype;
    in_numel = (in_numel == 0) ? static_cast<size_t>(in.numel()) : in_numel;
-    PADDLE_ENFORCE(in_numel % 2 == 0);
+    PADDLE_ENFORCE_EQ(in_numel % 2, 0,
-    PADDLE_ENFORCE(in_numel / 2 == static_cast<size_t>(k));
+                      platform::errors::InvalidArgument(
                          "The number of elements of input variable should be "
                          "even, but got %zu.",
                          in_numel));
    PADDLE_ENFORCE_EQ(in_numel / 2, static_cast<size_t>(k),
                      platform::errors::InvalidArgument(
                          "The number of elements of input variable should be "
                          "even, but got %zu.",
                          in_numel));
    out_numel = (out_numel == 0) ? static_cast<size_t>(out.numel()) : out_numel;
    int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device;
@ -154,7 +192,8 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
      PADDLE_ENFORCE_EQ(paddle::communication::dgc::sparseReduce(
                            gather_buff, k, out_tensor_buf,
                            static_cast<int>(out_numel), nranks_, stream),
-                        true);
+                        true, platform::errors::Unavailable(
                                  "Calling sparseReduce() failed."));
    });
  }
@ -187,11 +226,16 @@ void SparseAllReduceOpHandle::SparseAllReduceFunc(
 int SparseAllReduceOpHandle::GetKValue(const std::string &grad_name) {
  auto original_name = paddle::framework::GradOriginalVarName(grad_name);
  auto var_name = original_name + g_dgc_k;
-  PADDLE_ENFORCE(local_scopes_.size() > 0);
+  PADDLE_ENFORCE_GT(local_scopes_.size(), 0,
                    platform::errors::PreconditionNotMet(
                        "The number of local scope should be > 0, but got %zu.",
                        local_scopes_.size()));
  auto *scope = local_exec_scopes_[0];
  auto var = scope->FindVar(var_name);
-  PADDLE_ENFORCE_NOT_NULL(var);
+  PADDLE_ENFORCE_NOT_NULL(
      var, platform::errors::NotFound("Variable %s is not found in scope.",
                                      var_name));
  auto tensor = var->Get<LoDTensor>().data<float>();
  return *tensor;
 }
@ -202,15 +246,22 @@ bool SparseAllReduceOpHandle::IsEncoded() {
  }
  auto counter_name = g_dgc_counter_name;
  auto step_name = g_dgc_rampup_begin_step;
-  PADDLE_ENFORCE(local_scopes_.size() > 0);
+
  PADDLE_ENFORCE_GT(local_scopes_.size(), 0,
                    platform::errors::PreconditionNotMet(
                        "The number of local scope should be > 0, but got %zu.",
                        local_scopes_.size()));
  auto *local_scope = local_exec_scopes_[0];
  auto count_var = local_scope->FindVar(counter_name);
  auto step_var = local_scope->FindVar(step_name);
-  if (count_var == nullptr || step_var == nullptr) {
+
-    PADDLE_THROW("not find count_var:%s or step_var:%s", counter_name,
+  PADDLE_ENFORCE_NOT_NULL(
-                 step_var);
+      count_var, platform::errors::NotFound(
-  }
+                     "Variable %s is not found in scope.", counter_name));
  PADDLE_ENFORCE_NOT_NULL(
      step_var, platform::errors::NotFound("Variable %s is not found in scope.",
                                           step_var));
  float count = *count_var->Get<LoDTensor>().data<float>();
  float step = *step_var->Get<LoDTensor>().data<float>();