Renamed to strided_memcpy and prettify unittests

Add unittests for Crop and Concat
update-doc-pybind
Yu Yang 8 years ago committed by zchen0211
parent bda67d9d4b
commit 3fb0b6e67b

@ -96,4 +96,4 @@ set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
cc_test(gather_test SRCS gather_test.cc DEPS tensor) cc_test(gather_test SRCS gather_test.cc DEPS tensor)
cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
cc_test(tensor_copy_test SRCS tensor_copy_test.cc DEPS tensor paddle_memory) cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory)

@ -22,10 +22,10 @@ namespace operators {
namespace detail { namespace detail {
template <typename T, int Rank> template <typename T, int Rank>
struct TensorCopyFunctor; struct StridedMemcpyFunctor;
template <typename T> template <typename T>
struct TensorCopyFunctor<T, 1> { struct StridedMemcpyFunctor<T, 1> {
void operator()(const platform::DeviceContext& dev_ctx, const T* src, void operator()(const platform::DeviceContext& dev_ctx, const T* src,
framework::Dim<1> src_stride, framework::Dim<1> dst_dim, framework::Dim<1> src_stride, framework::Dim<1> dst_dim,
framework::Dim<1> dst_stride, T* dst) const { framework::Dim<1> dst_stride, T* dst) const {
@ -48,12 +48,12 @@ struct TensorCopyFunctor<T, 1> {
}; };
template <typename T, int Rank> template <typename T, int Rank>
struct TensorCopyFunctor { struct StridedMemcpyFunctor {
void operator()(const platform::DeviceContext& dev_ctx, const T* src, void operator()(const platform::DeviceContext& dev_ctx, const T* src,
framework::Dim<Rank> src_stride, framework::Dim<Rank> dst_dim, framework::Dim<Rank> src_stride, framework::Dim<Rank> dst_dim,
framework::Dim<Rank> dst_stride, T* dst) const { framework::Dim<Rank> dst_stride, T* dst) const {
for (int64_t i = 0; i < dst_dim.head; ++i) { for (int64_t i = 0; i < dst_dim.head; ++i) {
TensorCopyFunctor<T, Rank - 1> func; StridedMemcpyFunctor<T, Rank - 1> func;
func(dev_ctx, src, src_stride.tail, dst_dim.tail, dst_stride.tail, dst); func(dev_ctx, src, src_stride.tail, dst_dim.tail, dst_stride.tail, dst);
src += src_stride.head; src += src_stride.head;
dst += dst_stride.head; dst += dst_stride.head;
@ -62,10 +62,10 @@ struct TensorCopyFunctor {
}; };
template <typename T> template <typename T>
struct TensorCopyDimVisitor : public boost::static_visitor<void> { struct StridedCopyDimVisitor : public boost::static_visitor<void> {
TensorCopyDimVisitor(const platform::DeviceContext& dev_ctx, const T* src, StridedCopyDimVisitor(const platform::DeviceContext& dev_ctx, const T* src,
const framework::DDim& src_stride, const framework::DDim& src_stride,
const framework::DDim& dst_stride, T* dst) const framework::DDim& dst_stride, T* dst)
: dev_ctx_(dev_ctx), : dev_ctx_(dev_ctx),
src_(src), src_(src),
src_stride_(src_stride), src_stride_(src_stride),
@ -77,7 +77,7 @@ struct TensorCopyDimVisitor : public boost::static_visitor<void> {
Dim src_stride = boost::get<Dim>(src_stride_); Dim src_stride = boost::get<Dim>(src_stride_);
Dim dst_stride = boost::get<Dim>(dst_stride_); Dim dst_stride = boost::get<Dim>(dst_stride_);
constexpr int dim = Dim::dimensions; constexpr int dim = Dim::dimensions;
TensorCopyFunctor<T, dim> functor; StridedMemcpyFunctor<T, dim> functor;
functor(dev_ctx_, src_, src_stride, dst_dim, dst_stride, dst_); functor(dev_ctx_, src_, src_stride, dst_dim, dst_stride, dst_);
} }

@ -13,15 +13,17 @@
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "paddle/operators/detail/tensor_copy.h" #include "paddle/operators/detail/strided_memcpy.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
// Copy a tensor from src to dst. // Strided memory copy from src to dst.
// The src and dst should be both on dev_ctx.GetPlace()
// //
// the stride of an array (also referred to as increment, pitch or step size) is // The src and dst should be both on dev_ctx.GetPlace(), otherwise, there will
// be a segment fault.
//
// The stride of an array (also referred to as increment, pitch or step size) is
// the number of locations in memory between beginnings of successive array // the number of locations in memory between beginnings of successive array
// elements // elements
// //
@ -31,12 +33,12 @@ namespace operators {
// NOTE: When use GPU, the memcpy is async. To sync memcpy, please invoke // NOTE: When use GPU, the memcpy is async. To sync memcpy, please invoke
// `dev_ctx.Wait()`. // `dev_ctx.Wait()`.
template <typename T> template <typename T>
inline void TensorCopy(const platform::DeviceContext& dev_ctx, const T* src, inline void StridedMemcpy(const platform::DeviceContext& dev_ctx, const T* src,
const framework::DDim& src_stride, const framework::DDim& src_stride,
const framework::DDim& dst_dim, const framework::DDim& dst_dim,
const framework::DDim& dst_stride, T* dst) { const framework::DDim& dst_stride, T* dst) {
using namespace detail; using namespace detail;
TensorCopyDimVisitor<T> func(dev_ctx, src, src_stride, dst_stride, dst); StridedCopyDimVisitor<T> func(dev_ctx, src, src_stride, dst_stride, dst);
boost::apply_visitor(func, dst_dim); boost::apply_visitor(func, dst_dim);
} }
} // namespace operators } // namespace operators

@ -0,0 +1,160 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/strided_memcpy.h"
#include "gtest/gtest.h"
#include "paddle/memory/memory.h"
namespace paddle {
namespace operators {
TEST(StridedMemcpy, CPUCrop) {
// clang-format off
int src[] = {
0, 1, 2, 0, 0,
0, 3, 4, 0, 0,
0, 0, 0, 0, 0,
};
// clang-format on
framework::DDim src_stride({5, 1});
int dst[4];
framework::DDim dst_dim({2, 2});
framework::DDim dst_stride({2, 1});
platform::CPUDeviceContext ctx;
StridedMemcpy<int>(ctx, src + 1, src_stride, dst_dim, dst_stride, dst);
ASSERT_EQ(1, dst[0]);
ASSERT_EQ(2, dst[1]);
ASSERT_EQ(3, dst[2]);
ASSERT_EQ(4, dst[3]);
}
TEST(StridedMemcpy, CPUConcat) {
// clang-format off
int src[] = {
1, 2,
3, 4
};
// clang-format on
int dst[8];
framework::DDim src_stride({2, 1});
framework::DDim dst_dim({2, 2});
framework::DDim dst_stride({4, 1});
platform::CPUDeviceContext ctx;
StridedMemcpy<int>(ctx, src, src_stride, dst_dim, dst_stride, dst);
StridedMemcpy<int>(ctx, src, src_stride, dst_dim, dst_stride, dst + 2);
// clang-format off
int expect_dst[] = {
1, 2, 1, 2,
3, 4, 3, 4
};
// clang-format on
for (size_t i = 0; i < sizeof(expect_dst) / sizeof(int); ++i) {
ASSERT_EQ(expect_dst[i], dst[i]);
}
}
#ifndef PADDLE_ONLY_CPU
TEST(StridedMemcpy, GPUCrop) {
// clang-format off
int src[] = {
0, 1, 2, 0, 0,
0, 3, 4, 0, 0,
0, 0, 0, 0, 0,
};
// clang-format on
platform::GPUPlace gpu0(0);
platform::CPUPlace cpu;
int* gpu_src = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(src)));
memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src));
framework::DDim src_stride({5, 1});
int dst[4];
int* gpu_dst = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(dst)));
framework::DDim dst_dim({2, 2});
framework::DDim dst_stride({2, 1});
platform::CUDADeviceContext ctx(gpu0);
StridedMemcpy<int>(ctx, gpu_src + 1, src_stride, dst_dim, dst_stride,
gpu_dst);
memory::Copy(cpu, dst, gpu0, gpu_dst, sizeof(dst), ctx.stream());
ctx.Wait();
ASSERT_EQ(1, dst[0]);
ASSERT_EQ(2, dst[1]);
ASSERT_EQ(3, dst[2]);
ASSERT_EQ(4, dst[3]);
memory::Free(gpu0, gpu_dst);
memory::Free(gpu0, gpu_src);
}
TEST(StridedMemcpy, GPUConcat) {
// clang-format off
int src[] = {
1, 2,
3, 4
};
// clang-format on
platform::GPUPlace gpu0(0);
platform::CPUPlace cpu;
int* gpu_src = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(src)));
memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src));
int dst[8];
int* gpu_dst = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(dst)));
framework::DDim src_stride({2, 1});
framework::DDim dst_dim({2, 2});
framework::DDim dst_stride({4, 1});
platform::CUDADeviceContext ctx(gpu0);
StridedMemcpy<int>(ctx, gpu_src, src_stride, dst_dim, dst_stride, gpu_dst);
StridedMemcpy<int>(ctx, gpu_src, src_stride, dst_dim, dst_stride,
gpu_dst + 2);
memory::Copy(cpu, dst, gpu0, gpu_dst, sizeof(dst), ctx.stream());
ctx.Wait();
// clang-format off
int expect_dst[] = {
1, 2, 1, 2,
3, 4, 3, 4
};
// clang-format on
for (size_t i = 0; i < sizeof(expect_dst) / sizeof(int); ++i) {
ASSERT_EQ(expect_dst[i], dst[i]);
}
memory::Free(gpu0, gpu_dst);
memory::Free(gpu0, gpu_src);
}
#endif
} // namespace operators
} // namespace paddle

@ -1,77 +0,0 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/tensor_copy.h"
#include "gtest/gtest.h"
#include "paddle/memory/memory.h"
namespace paddle {
namespace operators {
TEST(TensorCopy, CPU_COPY) {
int src[] = {
0, 1, 2, 0, 0, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0,
};
framework::DDim src_stride({5, 1});
int dst[4];
framework::DDim dst_dim({2, 2});
framework::DDim dst_stride({2, 1});
platform::CPUDeviceContext ctx;
TensorCopy<int>(ctx, src + 1, src_stride, dst_dim, dst_stride, dst);
ASSERT_EQ(1, dst[0]);
ASSERT_EQ(2, dst[1]);
ASSERT_EQ(3, dst[2]);
ASSERT_EQ(4, dst[3]);
}
#ifndef PADDLE_ONLY_CPU
TEST(TensorCopy, GPU_COPY) {
int src[] = {
0, 1, 2, 0, 0, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0,
};
platform::GPUPlace gpu0(0);
platform::CPUPlace cpu;
int* gpu_src = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(src)));
memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src));
framework::DDim src_stride({5, 1});
int dst[4];
int* gpu_dst = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(dst)));
framework::DDim dst_dim({2, 2});
framework::DDim dst_stride({2, 1});
platform::CUDADeviceContext ctx(gpu0);
TensorCopy<int>(ctx, gpu_src + 1, src_stride, dst_dim, dst_stride, gpu_dst);
memory::Copy(cpu, dst, gpu0, gpu_dst, sizeof(dst));
ASSERT_EQ(1, dst[0]);
ASSERT_EQ(2, dst[1]);
ASSERT_EQ(3, dst[2]);
ASSERT_EQ(4, dst[3]);
memory::Free(gpu0, gpu_dst);
memory::Free(gpu0, gpu_src);
}
#endif
} // namespace operators
} // namespace paddle
Loading…
Cancel
Save