Renamed to strided_memcpy and prettify unittests

Add unittests for Crop and Concat
8 years ago · 3fb0b6e67b
parent bda67d9d4b
commit 3fb0b6e67b
5 changed files with 181 additions and 96 deletions
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@ -96,4 +96,4 @@ set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
 cc_test(gather_test SRCS gather_test.cc DEPS tensor)
 cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
 cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
-cc_test(tensor_copy_test SRCS tensor_copy_test.cc DEPS tensor paddle_memory)
+cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory)
--- a/paddle/operators/detail/strided_memcpy.h
+++ b/paddle/operators/detail/strided_memcpy.h
@ -22,10 +22,10 @@ namespace operators {
 namespace detail {

 template <typename T, int Rank>
-struct TensorCopyFunctor;
+struct StridedMemcpyFunctor;

 template <typename T>
-struct TensorCopyFunctor<T, 1> {
+struct StridedMemcpyFunctor<T, 1> {
  void operator()(const platform::DeviceContext& dev_ctx, const T* src,
                  framework::Dim<1> src_stride, framework::Dim<1> dst_dim,
                  framework::Dim<1> dst_stride, T* dst) const {
@ -48,12 +48,12 @@ struct TensorCopyFunctor<T, 1> {
 };

 template <typename T, int Rank>
-struct TensorCopyFunctor {
+struct StridedMemcpyFunctor {
  void operator()(const platform::DeviceContext& dev_ctx, const T* src,
                  framework::Dim<Rank> src_stride, framework::Dim<Rank> dst_dim,
                  framework::Dim<Rank> dst_stride, T* dst) const {
    for (int64_t i = 0; i < dst_dim.head; ++i) {
-      TensorCopyFunctor<T, Rank - 1> func;
+      StridedMemcpyFunctor<T, Rank - 1> func;
      func(dev_ctx, src, src_stride.tail, dst_dim.tail, dst_stride.tail, dst);
      src += src_stride.head;
      dst += dst_stride.head;
@ -62,10 +62,10 @@ struct TensorCopyFunctor {
 };

 template <typename T>
-struct TensorCopyDimVisitor : public boost::static_visitor<void> {
-  TensorCopyDimVisitor(const platform::DeviceContext& dev_ctx, const T* src,
-                       const framework::DDim& src_stride,
-                       const framework::DDim& dst_stride, T* dst)
+struct StridedCopyDimVisitor : public boost::static_visitor<void> {
+  StridedCopyDimVisitor(const platform::DeviceContext& dev_ctx, const T* src,
+                        const framework::DDim& src_stride,
+                        const framework::DDim& dst_stride, T* dst)
      : dev_ctx_(dev_ctx),
        src_(src),
        src_stride_(src_stride),
@ -77,7 +77,7 @@ struct TensorCopyDimVisitor : public boost::static_visitor<void> {
    Dim src_stride = boost::get<Dim>(src_stride_);
    Dim dst_stride = boost::get<Dim>(dst_stride_);
    constexpr int dim = Dim::dimensions;
-    TensorCopyFunctor<T, dim> functor;
+    StridedMemcpyFunctor<T, dim> functor;
    functor(dev_ctx_, src_, src_stride, dst_dim, dst_stride, dst_);
  }

--- a/paddle/operators/strided_memcpy.h
+++ b/paddle/operators/strided_memcpy.h
@ -13,15 +13,17 @@
   limitations under the License. */

 #pragma once
-#include "paddle/operators/detail/tensor_copy.h"
+#include "paddle/operators/detail/strided_memcpy.h"

 namespace paddle {
 namespace operators {

-// Copy a tensor from src to dst.
-// The src and dst should be both on dev_ctx.GetPlace()
+// Strided memory copy from src to dst.
 //
-// the stride of an array (also referred to as increment, pitch or step size) is
+// The src and dst should be both on dev_ctx.GetPlace(), otherwise, there will
+// be a segment fault.
+//
+// The stride of an array (also referred to as increment, pitch or step size) is
 // the number of locations in memory between beginnings of successive array
 // elements
 //
@ -31,12 +33,12 @@ namespace operators {
 // NOTE: When use GPU, the memcpy is async. To sync memcpy, please invoke
 // `dev_ctx.Wait()`.
 template <typename T>
-inline void TensorCopy(const platform::DeviceContext& dev_ctx, const T* src,
-                       const framework::DDim& src_stride,
-                       const framework::DDim& dst_dim,
-                       const framework::DDim& dst_stride, T* dst) {
+inline void StridedMemcpy(const platform::DeviceContext& dev_ctx, const T* src,
+                          const framework::DDim& src_stride,
+                          const framework::DDim& dst_dim,
+                          const framework::DDim& dst_stride, T* dst) {
  using namespace detail;
-  TensorCopyDimVisitor<T> func(dev_ctx, src, src_stride, dst_stride, dst);
+  StridedCopyDimVisitor<T> func(dev_ctx, src, src_stride, dst_stride, dst);
  boost::apply_visitor(func, dst_dim);
 }
 }  // namespace operators
--- a/paddle/operators/strided_memcpy_test.cc
+++ b/paddle/operators/strided_memcpy_test.cc
@ -0,0 +1,160 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/strided_memcpy.h"
+#include "gtest/gtest.h"
+#include "paddle/memory/memory.h"
+
+namespace paddle {
+namespace operators {
+
+TEST(StridedMemcpy, CPUCrop) {
+  // clang-format off
+  int src[] = {
+      0, 1, 2, 0, 0,
+      0, 3, 4, 0, 0,
+      0, 0, 0, 0, 0,
+  };
+  // clang-format on
+
+  framework::DDim src_stride({5, 1});
+
+  int dst[4];
+  framework::DDim dst_dim({2, 2});
+  framework::DDim dst_stride({2, 1});
+
+  platform::CPUDeviceContext ctx;
+  StridedMemcpy<int>(ctx, src + 1, src_stride, dst_dim, dst_stride, dst);
+
+  ASSERT_EQ(1, dst[0]);
+  ASSERT_EQ(2, dst[1]);
+  ASSERT_EQ(3, dst[2]);
+  ASSERT_EQ(4, dst[3]);
+}
+
+TEST(StridedMemcpy, CPUConcat) {
+  // clang-format off
+  int src[] = {
+      1, 2,
+      3, 4
+  };
+  // clang-format on
+
+  int dst[8];
+
+  framework::DDim src_stride({2, 1});
+  framework::DDim dst_dim({2, 2});
+  framework::DDim dst_stride({4, 1});
+  platform::CPUDeviceContext ctx;
+
+  StridedMemcpy<int>(ctx, src, src_stride, dst_dim, dst_stride, dst);
+  StridedMemcpy<int>(ctx, src, src_stride, dst_dim, dst_stride, dst + 2);
+
+  // clang-format off
+  int expect_dst[] = {
+      1, 2, 1, 2,
+      3, 4, 3, 4
+  };
+  // clang-format on
+  for (size_t i = 0; i < sizeof(expect_dst) / sizeof(int); ++i) {
+    ASSERT_EQ(expect_dst[i], dst[i]);
+  }
+}
+
+#ifndef PADDLE_ONLY_CPU
+TEST(StridedMemcpy, GPUCrop) {
+  // clang-format off
+  int src[] = {
+      0, 1, 2, 0, 0,
+      0, 3, 4, 0, 0,
+      0, 0, 0, 0, 0,
+  };
+  // clang-format on
+
+  platform::GPUPlace gpu0(0);
+  platform::CPUPlace cpu;
+
+  int* gpu_src = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(src)));
+  memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src));
+
+  framework::DDim src_stride({5, 1});
+
+  int dst[4];
+  int* gpu_dst = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(dst)));
+
+  framework::DDim dst_dim({2, 2});
+  framework::DDim dst_stride({2, 1});
+
+  platform::CUDADeviceContext ctx(gpu0);
+  StridedMemcpy<int>(ctx, gpu_src + 1, src_stride, dst_dim, dst_stride,
+                     gpu_dst);
+
+  memory::Copy(cpu, dst, gpu0, gpu_dst, sizeof(dst), ctx.stream());
+  ctx.Wait();
+
+  ASSERT_EQ(1, dst[0]);
+  ASSERT_EQ(2, dst[1]);
+  ASSERT_EQ(3, dst[2]);
+  ASSERT_EQ(4, dst[3]);
+
+  memory::Free(gpu0, gpu_dst);
+  memory::Free(gpu0, gpu_src);
+}
+
+TEST(StridedMemcpy, GPUConcat) {
+  // clang-format off
+  int src[] = {
+      1, 2,
+      3, 4
+  };
+  // clang-format on
+
+  platform::GPUPlace gpu0(0);
+  platform::CPUPlace cpu;
+
+  int* gpu_src = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(src)));
+  memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src));
+
+  int dst[8];
+  int* gpu_dst = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(dst)));
+
+  framework::DDim src_stride({2, 1});
+  framework::DDim dst_dim({2, 2});
+  framework::DDim dst_stride({4, 1});
+  platform::CUDADeviceContext ctx(gpu0);
+
+  StridedMemcpy<int>(ctx, gpu_src, src_stride, dst_dim, dst_stride, gpu_dst);
+  StridedMemcpy<int>(ctx, gpu_src, src_stride, dst_dim, dst_stride,
+                     gpu_dst + 2);
+
+  memory::Copy(cpu, dst, gpu0, gpu_dst, sizeof(dst), ctx.stream());
+  ctx.Wait();
+
+  // clang-format off
+  int expect_dst[] = {
+      1, 2, 1, 2,
+      3, 4, 3, 4
+  };
+  // clang-format on
+  for (size_t i = 0; i < sizeof(expect_dst) / sizeof(int); ++i) {
+    ASSERT_EQ(expect_dst[i], dst[i]);
+  }
+
+  memory::Free(gpu0, gpu_dst);
+  memory::Free(gpu0, gpu_src);
+}
+
+#endif
+}  // namespace operators
+}  // namespace paddle
--- a/paddle/operators/tensor_copy_test.cc
+++ b/paddle/operators/tensor_copy_test.cc
@ -1,77 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License. */
-
-#include "paddle/operators/tensor_copy.h"
-#include "gtest/gtest.h"
-#include "paddle/memory/memory.h"
-
-namespace paddle {
-namespace operators {
-TEST(TensorCopy, CPU_COPY) {
-  int src[] = {
-      0, 1, 2, 0, 0, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0,
-  };
-
-  framework::DDim src_stride({5, 1});
-
-  int dst[4];
-  framework::DDim dst_dim({2, 2});
-  framework::DDim dst_stride({2, 1});
-
-  platform::CPUDeviceContext ctx;
-  TensorCopy<int>(ctx, src + 1, src_stride, dst_dim, dst_stride, dst);
-
-  ASSERT_EQ(1, dst[0]);
-  ASSERT_EQ(2, dst[1]);
-  ASSERT_EQ(3, dst[2]);
-  ASSERT_EQ(4, dst[3]);
-}
-
-#ifndef PADDLE_ONLY_CPU
-TEST(TensorCopy, GPU_COPY) {
-  int src[] = {
-      0, 1, 2, 0, 0, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0,
-  };
-
-  platform::GPUPlace gpu0(0);
-  platform::CPUPlace cpu;
-
-  int* gpu_src = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(src)));
-  memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src));
-
-  framework::DDim src_stride({5, 1});
-
-  int dst[4];
-  int* gpu_dst = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(dst)));
-
-  framework::DDim dst_dim({2, 2});
-  framework::DDim dst_stride({2, 1});
-
-  platform::CUDADeviceContext ctx(gpu0);
-  TensorCopy<int>(ctx, gpu_src + 1, src_stride, dst_dim, dst_stride, gpu_dst);
-
-  memory::Copy(cpu, dst, gpu0, gpu_dst, sizeof(dst));
-
-  ASSERT_EQ(1, dst[0]);
-  ASSERT_EQ(2, dst[1]);
-  ASSERT_EQ(3, dst[2]);
-  ASSERT_EQ(4, dst[3]);
-
-  memory::Free(gpu0, gpu_dst);
-  memory::Free(gpu0, gpu_src);
-}
-
-#endif
-}  // namespace operators
-}  // namespace paddle