Renamed to strided_memcpy and prettify unittests

Add unittests for Crop and Concat
8 years ago · 3fb0b6e67b
parent bda67d9d4b
commit 3fb0b6e67b
5 changed files with 181 additions and 96 deletions
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@ -96,4 +96,4 @@ set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
 cc_test(gather_test SRCS gather_test.cc DEPS tensor)
 cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
 cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
-cc_test(tensor_copy_test SRCS tensor_copy_test.cc DEPS tensor paddle_memory)
+cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory)
--- a/paddle/operators/detail/strided_memcpy.h
+++ b/paddle/operators/detail/strided_memcpy.h
@ -22,10 +22,10 @@ namespace operators {
 namespace detail {
 template <typename T, int Rank>
-struct TensorCopyFunctor;
+struct StridedMemcpyFunctor;
 template <typename T>
-struct TensorCopyFunctor<T, 1> {
+struct StridedMemcpyFunctor<T, 1> {
  void operator()(const platform::DeviceContext& dev_ctx, const T* src,
                  framework::Dim<1> src_stride, framework::Dim<1> dst_dim,
                  framework::Dim<1> dst_stride, T* dst) const {
@ -48,12 +48,12 @@ struct TensorCopyFunctor<T, 1> {
 };
 template <typename T, int Rank>
-struct TensorCopyFunctor {
+struct StridedMemcpyFunctor {
  void operator()(const platform::DeviceContext& dev_ctx, const T* src,
                  framework::Dim<Rank> src_stride, framework::Dim<Rank> dst_dim,
                  framework::Dim<Rank> dst_stride, T* dst) const {
    for (int64_t i = 0; i < dst_dim.head; ++i) {
-      TensorCopyFunctor<T, Rank - 1> func;
+      StridedMemcpyFunctor<T, Rank - 1> func;
      func(dev_ctx, src, src_stride.tail, dst_dim.tail, dst_stride.tail, dst);
      src += src_stride.head;
      dst += dst_stride.head;
@ -62,10 +62,10 @@ struct TensorCopyFunctor {
 };
 template <typename T>
-struct TensorCopyDimVisitor : public boost::static_visitor<void> {
+struct StridedCopyDimVisitor : public boost::static_visitor<void> {
-  TensorCopyDimVisitor(const platform::DeviceContext& dev_ctx, const T* src,
+  StridedCopyDimVisitor(const platform::DeviceContext& dev_ctx, const T* src,
-                       const framework::DDim& src_stride,
+                        const framework::DDim& src_stride,
-                       const framework::DDim& dst_stride, T* dst)
+                        const framework::DDim& dst_stride, T* dst)
      : dev_ctx_(dev_ctx),
        src_(src),
        src_stride_(src_stride),
@ -77,7 +77,7 @@ struct TensorCopyDimVisitor : public boost::static_visitor<void> {
    Dim src_stride = boost::get<Dim>(src_stride_);
    Dim dst_stride = boost::get<Dim>(dst_stride_);
    constexpr int dim = Dim::dimensions;
-    TensorCopyFunctor<T, dim> functor;
+    StridedMemcpyFunctor<T, dim> functor;
    functor(dev_ctx_, src_, src_stride, dst_dim, dst_stride, dst_);
  }
--- a/paddle/operators/strided_memcpy.h
+++ b/paddle/operators/strided_memcpy.h
@ -13,15 +13,17 @@
   limitations under the License. */
 #pragma once
-#include "paddle/operators/detail/tensor_copy.h"
+#include "paddle/operators/detail/strided_memcpy.h"
 namespace paddle {
 namespace operators {
-// Copy a tensor from src to dst.
+// Strided memory copy from src to dst.
 // The src and dst should be both on dev_ctx.GetPlace()
 //
-// the stride of an array (also referred to as increment, pitch or step size) is
+// The src and dst should be both on dev_ctx.GetPlace(), otherwise, there will
 // be a segment fault.
 //
 // The stride of an array (also referred to as increment, pitch or step size) is
 // the number of locations in memory between beginnings of successive array
 // elements
 //
@ -31,12 +33,12 @@ namespace operators {
 // NOTE: When use GPU, the memcpy is async. To sync memcpy, please invoke
 // `dev_ctx.Wait()`.
 template <typename T>
-inline void TensorCopy(const platform::DeviceContext& dev_ctx, const T* src,
+inline void StridedMemcpy(const platform::DeviceContext& dev_ctx, const T* src,
-                       const framework::DDim& src_stride,
+                          const framework::DDim& src_stride,
-                       const framework::DDim& dst_dim,
+                          const framework::DDim& dst_dim,
-                       const framework::DDim& dst_stride, T* dst) {
+                          const framework::DDim& dst_stride, T* dst) {
  using namespace detail;
-  TensorCopyDimVisitor<T> func(dev_ctx, src, src_stride, dst_stride, dst);
+  StridedCopyDimVisitor<T> func(dev_ctx, src, src_stride, dst_stride, dst);
  boost::apply_visitor(func, dst_dim);
 }
 }  // namespace operators
--- a/paddle/operators/strided_memcpy_test.cc
+++ b/paddle/operators/strided_memcpy_test.cc
@ -0,0 +1,160 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
   http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */
 #include "paddle/operators/strided_memcpy.h"
 #include "gtest/gtest.h"
 #include "paddle/memory/memory.h"
 namespace paddle {
 namespace operators {
 TEST(StridedMemcpy, CPUCrop) {
  // clang-format off
  int src[] = {
      0, 1, 2, 0, 0,
      0, 3, 4, 0, 0,
      0, 0, 0, 0, 0,
  };
  // clang-format on
  framework::DDim src_stride({5, 1});
  int dst[4];
  framework::DDim dst_dim({2, 2});
  framework::DDim dst_stride({2, 1});
  platform::CPUDeviceContext ctx;
  StridedMemcpy<int>(ctx, src + 1, src_stride, dst_dim, dst_stride, dst);
  ASSERT_EQ(1, dst[0]);
  ASSERT_EQ(2, dst[1]);
  ASSERT_EQ(3, dst[2]);
  ASSERT_EQ(4, dst[3]);
 }
 TEST(StridedMemcpy, CPUConcat) {
  // clang-format off
  int src[] = {
      1, 2,
      3, 4
  };
  // clang-format on
  int dst[8];
  framework::DDim src_stride({2, 1});
  framework::DDim dst_dim({2, 2});
  framework::DDim dst_stride({4, 1});
  platform::CPUDeviceContext ctx;
  StridedMemcpy<int>(ctx, src, src_stride, dst_dim, dst_stride, dst);
  StridedMemcpy<int>(ctx, src, src_stride, dst_dim, dst_stride, dst + 2);
  // clang-format off
  int expect_dst[] = {
      1, 2, 1, 2,
      3, 4, 3, 4
  };
  // clang-format on
  for (size_t i = 0; i < sizeof(expect_dst) / sizeof(int); ++i) {
    ASSERT_EQ(expect_dst[i], dst[i]);
  }
 }
 #ifndef PADDLE_ONLY_CPU
 TEST(StridedMemcpy, GPUCrop) {
  // clang-format off
  int src[] = {
      0, 1, 2, 0, 0,
      0, 3, 4, 0, 0,
      0, 0, 0, 0, 0,
  };
  // clang-format on
  platform::GPUPlace gpu0(0);
  platform::CPUPlace cpu;
  int* gpu_src = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(src)));
  memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src));
  framework::DDim src_stride({5, 1});
  int dst[4];
  int* gpu_dst = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(dst)));
  framework::DDim dst_dim({2, 2});
  framework::DDim dst_stride({2, 1});
  platform::CUDADeviceContext ctx(gpu0);
  StridedMemcpy<int>(ctx, gpu_src + 1, src_stride, dst_dim, dst_stride,
                     gpu_dst);
  memory::Copy(cpu, dst, gpu0, gpu_dst, sizeof(dst), ctx.stream());
  ctx.Wait();
  ASSERT_EQ(1, dst[0]);
  ASSERT_EQ(2, dst[1]);
  ASSERT_EQ(3, dst[2]);
  ASSERT_EQ(4, dst[3]);
  memory::Free(gpu0, gpu_dst);
  memory::Free(gpu0, gpu_src);
 }
 TEST(StridedMemcpy, GPUConcat) {
  // clang-format off
  int src[] = {
      1, 2,
      3, 4
  };
  // clang-format on
  platform::GPUPlace gpu0(0);
  platform::CPUPlace cpu;
  int* gpu_src = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(src)));
  memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src));
  int dst[8];
  int* gpu_dst = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(dst)));
  framework::DDim src_stride({2, 1});
  framework::DDim dst_dim({2, 2});
  framework::DDim dst_stride({4, 1});
  platform::CUDADeviceContext ctx(gpu0);
  StridedMemcpy<int>(ctx, gpu_src, src_stride, dst_dim, dst_stride, gpu_dst);
  StridedMemcpy<int>(ctx, gpu_src, src_stride, dst_dim, dst_stride,
                     gpu_dst + 2);
  memory::Copy(cpu, dst, gpu0, gpu_dst, sizeof(dst), ctx.stream());
  ctx.Wait();
  // clang-format off
  int expect_dst[] = {
      1, 2, 1, 2,
      3, 4, 3, 4
  };
  // clang-format on
  for (size_t i = 0; i < sizeof(expect_dst) / sizeof(int); ++i) {
    ASSERT_EQ(expect_dst[i], dst[i]);
  }
  memory::Free(gpu0, gpu_dst);
  memory::Free(gpu0, gpu_src);
 }
 #endif
 }  // namespace operators
 }  // namespace paddle
--- a/paddle/operators/tensor_copy_test.cc
+++ b/paddle/operators/tensor_copy_test.cc
@ -1,77 +0,0 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
   http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */
 #include "paddle/operators/tensor_copy.h"
 #include "gtest/gtest.h"
 #include "paddle/memory/memory.h"
 namespace paddle {
 namespace operators {
 TEST(TensorCopy, CPU_COPY) {
  int src[] = {
      0, 1, 2, 0, 0, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0,
  };
  framework::DDim src_stride({5, 1});
  int dst[4];
  framework::DDim dst_dim({2, 2});
  framework::DDim dst_stride({2, 1});
  platform::CPUDeviceContext ctx;
  TensorCopy<int>(ctx, src + 1, src_stride, dst_dim, dst_stride, dst);
  ASSERT_EQ(1, dst[0]);
  ASSERT_EQ(2, dst[1]);
  ASSERT_EQ(3, dst[2]);
  ASSERT_EQ(4, dst[3]);
 }
 #ifndef PADDLE_ONLY_CPU
 TEST(TensorCopy, GPU_COPY) {
  int src[] = {
      0, 1, 2, 0, 0, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0,
  };
  platform::GPUPlace gpu0(0);
  platform::CPUPlace cpu;
  int* gpu_src = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(src)));
  memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src));
  framework::DDim src_stride({5, 1});
  int dst[4];
  int* gpu_dst = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(dst)));
  framework::DDim dst_dim({2, 2});
  framework::DDim dst_stride({2, 1});
  platform::CUDADeviceContext ctx(gpu0);
  TensorCopy<int>(ctx, gpu_src + 1, src_stride, dst_dim, dst_stride, gpu_dst);
  memory::Copy(cpu, dst, gpu0, gpu_dst, sizeof(dst));
  ASSERT_EQ(1, dst[0]);
  ASSERT_EQ(2, dst[1]);
  ASSERT_EQ(3, dst[2]);
  ASSERT_EQ(4, dst[3]);
  memory::Free(gpu0, gpu_dst);
  memory::Free(gpu0, gpu_src);
 }
 #endif
 }  // namespace operators
 }  // namespace paddle