bugfix lod cpu performance (#12297)

7 years ago · 02cf54d331
parent b41f8b9d42
commit 02cf54d331
4 changed files with 99 additions and 35 deletions
--- a/paddle/fluid/framework/mixed_vector.h
+++ b/paddle/fluid/framework/mixed_vector.h
--- a/paddle/fluid/operators/adam_op.h
+++ b/paddle/fluid/operators/adam_op.h
@ -293,11 +293,18 @@ class AdamOpKernel : public framework::OpKernel<T> {
      auto& grad_tensor = grad_merge.value();
      const T* grad_data = grad_tensor.template data<T>();
      int64_t* rows = nullptr;
 // When compiled without CUDA, the CUDAMutableData() interface should not be
 // provided.
 #if defined(PADDLE_WITH_CUDA)
      if (platform::is_gpu_place(ctx.GetPlace())) {
        rows = grad_merge.mutable_rows()->CUDAMutableData(ctx.GetPlace());
      } else {
 #endif
        rows = grad_merge.mutable_rows()->data();
 #if defined(PADDLE_WITH_CUDA)
      }
 #endif
      auto row_numel = grad_tensor.numel() / grad_merge.rows().size();
      SparseAdamFunctor<T> functor(
--- a/paddle/fluid/operators/detection/target_assign_op.h
+++ b/paddle/fluid/operators/detection/target_assign_op.h
@ -106,7 +106,11 @@ class TargetAssignKernel : public framework::OpKernel<T> {
    int64_t k = x->dims()[2];
    auto x_lod = x->lod().back();
 #if defined(PADDLE_WITH_CUDA)
    size_t* x_lod_data = x_lod.MutableData(ctx.GetPlace());
 #else
    size_t* x_lod_data = x_lod.data();
 #endif
    TargetAssignFunctor<T, WT> functor(x_data, match_idx_data, x_lod_data,
                                       mismatch_value, n, m, p, k, out_data,
@ -121,7 +125,11 @@ class TargetAssignKernel : public framework::OpKernel<T> {
      PADDLE_ENFORCE_EQ(neg_indices->lod().size(), 1UL);
      const int* neg_idx_data = neg_indices->data<int>();
      auto neg_lod = neg_indices->lod().back();
 #if defined(PADDLE_WITH_CUDA)
      size_t* neg_lod_data = neg_lod.MutableData(ctx.GetPlace());
 #else
      size_t* neg_lod_data = neg_lod.data();
 #endif
      NegTargetAssignFunctor<DeviceContext, T, WT> neg_trg_functor;
      neg_trg_functor(device_ctx, neg_idx_data, neg_lod_data, n, m, k,
                      mismatch_value, out_data, out_wt_data);
--- a/paddle/fluid/operators/math/sequence2batch.h
+++ b/paddle/fluid/operators/math/sequence2batch.h
@ -78,7 +78,7 @@ class LoDTensor2BatchFunctor {
    auto lods = lod_tensor.lod();
    PADDLE_ENFORCE_EQ(lods.size(), 1UL, "Only support one level sequence now.");
-    auto lod = lods[0];
+    const auto& lod = lods[0];
    std::vector<SeqInfo> seq_info;
    for (size_t seq_id = 0; seq_id < lod.size() - 1; ++seq_id) {