Add TensorCopy method

A method to copy a tensor with stride and dimension. It is useful for Crop, Concat, etc.
8 years ago · 3a4897ab15
parent 5b42d2b21b
commit 3a4897ab15
4 changed files with 214 additions and 0 deletions
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@ -96,3 +96,4 @@ set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
 cc_test(gather_test SRCS gather_test.cc DEPS tensor)
 cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
 cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
 cc_test(tensor_copy_test SRCS tensor_copy_test.cc DEPS tensor paddle_memory)
--- a/paddle/operators/detail/tensor_copy.h
+++ b/paddle/operators/detail/tensor_copy.h
@ -0,0 +1,93 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
   http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */
 #pragma once
 #include "paddle/framework/ddim.h"
 #include "paddle/memory/memcpy.h"
 #include "paddle/platform/device_context.h"
 namespace paddle {
 namespace operators {
 namespace detail {
 template <typename T, int Rank>
 struct TensorCopyFunctor;
 template <typename T>
 struct TensorCopyFunctor<T, 1> {
  void operator()(const platform::DeviceContext& dev_ctx, const T* src,
                  framework::Dim<1> src_stride, framework::Dim<1> dst_dim,
                  framework::Dim<1> dst_stride, T* dst) const {
    auto place = dev_ctx.GetPlace();
    if (platform::is_cpu_place(place)) {
      auto& cpu_place = boost::get<platform::CPUPlace>(place);
      memory::Copy(cpu_place, dst, cpu_place, src, sizeof(T) * dst_dim.head);
    } else {
 #ifndef PADDLE_ONLY_CPU
      auto& gpu_place = boost::get<platform::GPUPlace>(place);
      auto& cuda_ctx =
          reinterpret_cast<const platform::CUDADeviceContext&>(dev_ctx);
      memory::Copy(gpu_place, dst, gpu_place, src, sizeof(T) * dst_dim.head,
                   cuda_ctx.stream());
 #else
      PADDLE_THROW("Paddle is not compiled with GPU");
 #endif
    }
  }
 };
 template <typename T, int Rank>
 struct TensorCopyFunctor {
  void operator()(const platform::DeviceContext& dev_ctx, const T* src,
                  framework::Dim<Rank> src_stride, framework::Dim<Rank> dst_dim,
                  framework::Dim<Rank> dst_stride, T* dst) const {
    for (int64_t i = 0; i < dst_dim.head; ++i) {
      TensorCopyFunctor<T, Rank - 1> func;
      func(dev_ctx, src, src_stride.tail, dst_dim.tail, dst_stride.tail, dst);
      src += src_stride.head;
      dst += dst_stride.head;
    }
  }
 };
 template <typename T>
 struct TensorCopyDimVisitor : public boost::static_visitor<void> {
  TensorCopyDimVisitor(const platform::DeviceContext& dev_ctx, const T* src,
                       const framework::DDim& src_stride,
                       const framework::DDim& dst_stride, T* dst)
      : dev_ctx_(dev_ctx),
        src_(src),
        src_stride_(src_stride),
        dst_stride_(dst_stride),
        dst_(dst) {}
  template <typename Dim>
  void operator()(Dim dst_dim) const {
    Dim src_stride = boost::get<Dim>(src_stride_);
    Dim dst_stride = boost::get<Dim>(dst_stride_);
    constexpr int dim = Dim::dimensions;
    TensorCopyFunctor<T, dim> functor;
    functor(dev_ctx_, src_, src_stride, dst_dim, dst_stride, dst_);
  }
  const platform::DeviceContext& dev_ctx_;
  const T* src_;
  const framework::DDim& src_stride_;
  const framework::DDim& dst_stride_;
  T* dst_;
 };
 }  // namespace detail
 }  // namespace operators
 }  // namespace paddle
--- a/paddle/operators/tensor_copy.h
+++ b/paddle/operators/tensor_copy.h
@ -0,0 +1,43 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
   http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */
 #pragma once
 #include "paddle/operators/detail/tensor_copy.h"
 namespace paddle {
 namespace operators {
 // Copy a tensor from src to dst.
 // The src and dst should be both on dev_ctx.GetPlace()
 //
 // the stride of an array (also referred to as increment, pitch or step size) is
 // the number of locations in memory between beginnings of successive array
 // elements
 //
 // For example, for tensor like [1, 3, 300, 300]. If there is no padding, the
 // stride is [270000, 90000, 300, 1].
 //
 // NOTE: When use GPU, the memcpy is async. To sync memcpy, please invoke
 // `dev_ctx.Wait()`.
 template <typename T>
 inline void TensorCopy(const platform::DeviceContext& dev_ctx, const T* src,
                       const framework::DDim& src_stride,
                       const framework::DDim& dst_dim,
                       const framework::DDim& dst_stride, T* dst) {
  using namespace detail;
  TensorCopyDimVisitor<T> func(dev_ctx, src, src_stride, dst_stride, dst);
  boost::apply_visitor(func, dst_dim);
 }
 }  // namespace operators
 }  // namespace paddle
--- a/paddle/operators/tensor_copy_test.cc
+++ b/paddle/operators/tensor_copy_test.cc
@ -0,0 +1,77 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
   http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */
 #include "paddle/operators/tensor_copy.h"
 #include "gtest/gtest.h"
 #include "paddle/memory/memory.h"
 namespace paddle {
 namespace operators {
 TEST(TensorCopy, CPU_COPY) {
  int src[] = {
      0, 1, 2, 0, 0, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0,
  };
  framework::DDim src_stride({5, 1});
  int dst[4];
  framework::DDim dst_dim({2, 2});
  framework::DDim dst_stride({2, 1});
  platform::CPUDeviceContext ctx;
  TensorCopy<int>(ctx, src + 1, src_stride, dst_dim, dst_stride, dst);
  ASSERT_EQ(1, dst[0]);
  ASSERT_EQ(2, dst[1]);
  ASSERT_EQ(3, dst[2]);
  ASSERT_EQ(4, dst[3]);
 }
 #ifndef PADDLE_ONLY_CPU
 TEST(TensorCopy, GPU_COPY) {
  int src[] = {
      0, 1, 2, 0, 0, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0,
  };
  platform::GPUPlace gpu0(0);
  platform::CPUPlace cpu;
  int* gpu_src = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(src)));
  memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src));
  framework::DDim src_stride({5, 1});
  int dst[4];
  int* gpu_dst = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(dst)));
  framework::DDim dst_dim({2, 2});
  framework::DDim dst_stride({2, 1});
  platform::CUDADeviceContext ctx(gpu0);
  TensorCopy<int>(ctx, gpu_src + 1, src_stride, dst_dim, dst_stride, gpu_dst);
  memory::Copy(cpu, dst, gpu0, gpu_dst, sizeof(dst));
  ASSERT_EQ(1, dst[0]);
  ASSERT_EQ(2, dst[1]);
  ASSERT_EQ(3, dst[2]);
  ASSERT_EQ(4, dst[3]);
  memory::Free(gpu0, gpu_dst);
  memory::Free(gpu0, gpu_src);
 }
 #endif
 }  // namespace operators
 }  // namespace paddle