|
|
|
|
@ -13,6 +13,7 @@
|
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
|
|
#include <memory>
|
|
|
|
|
#include <type_traits>
|
|
|
|
|
#include <vector>
|
|
|
|
|
#include "gtest/gtest.h"
|
|
|
|
|
#include "paddle/fluid/framework/variable.h"
|
|
|
|
|
@ -27,9 +28,8 @@ namespace imperative {
|
|
|
|
|
|
|
|
|
|
void TensorAdd(const framework::Variable& src, framework::Variable* dst);
|
|
|
|
|
|
|
|
|
|
#if defined(PADDLE_WITH_CUDA)
|
|
|
|
|
template <typename T>
|
|
|
|
|
int TensorGPUAddTest(platform::CUDAPlace place, T t1, T t2) {
|
|
|
|
|
template <typename Place, typename T>
|
|
|
|
|
int TensorddTest(Place place, T t1, T t2) {
|
|
|
|
|
framework::Variable var1;
|
|
|
|
|
framework::Variable var2;
|
|
|
|
|
std::vector<T> src_data(10, t1);
|
|
|
|
|
@ -39,6 +39,7 @@ int TensorGPUAddTest(platform::CUDAPlace place, T t1, T t2) {
|
|
|
|
|
for (unsigned int i = 0; i < 10; i++) {
|
|
|
|
|
result.emplace_back(src_data[i] + dst_data[i]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<int64_t> dims = {2, 5};
|
|
|
|
|
auto* src = var1.GetMutable<framework::LoDTensor>();
|
|
|
|
|
auto* dst = var2.GetMutable<framework::LoDTensor>();
|
|
|
|
|
@ -46,44 +47,19 @@ int TensorGPUAddTest(platform::CUDAPlace place, T t1, T t2) {
|
|
|
|
|
dst->Resize(framework::make_ddim(dims));
|
|
|
|
|
auto* src_mutable = src->mutable_data<T>(place);
|
|
|
|
|
auto* dst_mutable = dst->mutable_data<T>(place);
|
|
|
|
|
paddle::memory::Copy(place, src_mutable, src_place, src_data.data(),
|
|
|
|
|
sizeof(T) * src_data.size(), 0);
|
|
|
|
|
paddle::memory::Copy(place, dst_mutable, src_place, dst_data.data(),
|
|
|
|
|
sizeof(T) * dst_data.size(), 0);
|
|
|
|
|
imperative::TensorAdd(var1, &var2);
|
|
|
|
|
framework::LoDTensor rlt;
|
|
|
|
|
platform::CPUPlace rlt_place;
|
|
|
|
|
framework::TensorCopySync(*dst, rlt_place, &rlt);
|
|
|
|
|
|
|
|
|
|
for (unsigned int i = 0; i < rlt.numel(); i++) {
|
|
|
|
|
if (rlt.data<T>()[i] != result[i]) return 1;
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
if (!std::is_same<Place, platform::CUDAPlace>::value) {
|
|
|
|
|
paddle::memory::Copy(place, src_mutable, src_place, src_data.data(),
|
|
|
|
|
sizeof(T) * src_data.size());
|
|
|
|
|
paddle::memory::Copy(place, dst_mutable, src_place, dst_data.data(),
|
|
|
|
|
sizeof(T) * dst_data.size());
|
|
|
|
|
#if defined(PADDLE_WITH_CUDA)
|
|
|
|
|
} else {
|
|
|
|
|
paddle::memory::Copy(place, src_mutable, src_place, src_data.data(),
|
|
|
|
|
sizeof(T) * src_data.size(), 0);
|
|
|
|
|
paddle::memory::Copy(place, dst_mutable, src_place, dst_data.data(),
|
|
|
|
|
sizeof(T) * dst_data.size(), 0);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
int TensorCPUAddTest(platform::CPUPlace place, T t1, T t2) {
|
|
|
|
|
framework::Variable var1;
|
|
|
|
|
framework::Variable var2;
|
|
|
|
|
std::vector<T> src_data(10, t1);
|
|
|
|
|
std::vector<T> dst_data(10, t2);
|
|
|
|
|
std::vector<T> result;
|
|
|
|
|
platform::CPUPlace src_place;
|
|
|
|
|
for (unsigned int i = 0; i < 10; i++) {
|
|
|
|
|
result.emplace_back(src_data[i] + dst_data[i]);
|
|
|
|
|
}
|
|
|
|
|
std::vector<int64_t> dims = {2, 5};
|
|
|
|
|
auto* src = var1.GetMutable<framework::LoDTensor>();
|
|
|
|
|
auto* dst = var2.GetMutable<framework::LoDTensor>();
|
|
|
|
|
src->Resize(framework::make_ddim(dims));
|
|
|
|
|
dst->Resize(framework::make_ddim(dims));
|
|
|
|
|
auto* src_mutable = src->mutable_data<T>(place);
|
|
|
|
|
auto* dst_mutable = dst->mutable_data<T>(place);
|
|
|
|
|
paddle::memory::Copy(place, src_mutable, src_place, src_data.data(),
|
|
|
|
|
sizeof(T) * src_data.size());
|
|
|
|
|
paddle::memory::Copy(place, dst_mutable, src_place, dst_data.data(),
|
|
|
|
|
sizeof(T) * dst_data.size());
|
|
|
|
|
imperative::TensorAdd(var1, &var2);
|
|
|
|
|
framework::LoDTensor rlt;
|
|
|
|
|
platform::CPUPlace rlt_place;
|
|
|
|
|
@ -92,6 +68,7 @@ int TensorCPUAddTest(platform::CPUPlace place, T t1, T t2) {
|
|
|
|
|
for (unsigned int i = 0; i < rlt.numel(); i++) {
|
|
|
|
|
if (rlt.data<T>()[i] != result[i]) return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@ -102,18 +79,38 @@ TEST(test_add_functor, add_functor) {
|
|
|
|
|
platform::CPUPlace cpu_place;
|
|
|
|
|
|
|
|
|
|
int cpu_res = 1;
|
|
|
|
|
cpu_res = TensorCPUAddTest(cpu_place, 1.0, 0.0);
|
|
|
|
|
cpu_res = TensorddTest(cpu_place, 1.0, 0.0);
|
|
|
|
|
EXPECT_EQ(cpu_res, 0);
|
|
|
|
|
cpu_res = TensorddTest(cpu_place, static_cast<double>(1.0),
|
|
|
|
|
static_cast<double>(2.0));
|
|
|
|
|
EXPECT_EQ(cpu_res, 0);
|
|
|
|
|
cpu_res = TensorCPUAddTest(cpu_place, static_cast<double>(1.0),
|
|
|
|
|
static_cast<double>(2.0));
|
|
|
|
|
cpu_res = TensorddTest(cpu_place, static_cast<platform::float16>(1.0),
|
|
|
|
|
static_cast<platform::float16>(2.0));
|
|
|
|
|
EXPECT_EQ(cpu_res, 0);
|
|
|
|
|
#if defined(PADDLE_WITH_CUDA)
|
|
|
|
|
int gpu_res = 1;
|
|
|
|
|
gpu_res = TensorGPUAddTest(gpu_place, 1.0, 0.0);
|
|
|
|
|
gpu_res = TensorddTest(gpu_place, 1.0, 0.0);
|
|
|
|
|
EXPECT_EQ(gpu_res, 0);
|
|
|
|
|
gpu_res = TensorGPUAddTest(gpu_place, static_cast<double>(1.0),
|
|
|
|
|
static_cast<double>(2.0));
|
|
|
|
|
gpu_res = TensorddTest(gpu_place, static_cast<double>(1.0),
|
|
|
|
|
static_cast<double>(2.0));
|
|
|
|
|
EXPECT_EQ(gpu_res, 0);
|
|
|
|
|
gpu_res = TensorddTest(gpu_place, static_cast<platform::float16>(1.0),
|
|
|
|
|
static_cast<platform::float16>(2.0));
|
|
|
|
|
EXPECT_EQ(gpu_res, 0);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST(test_add_functor, execption) {
|
|
|
|
|
platform::CUDAPinnedPlace cuda_pinned_place;
|
|
|
|
|
platform::CUDAPlace cuda_place(0);
|
|
|
|
|
platform::CPUPlace cpu_place;
|
|
|
|
|
|
|
|
|
|
ASSERT_ANY_THROW(TensorddTest(cpu_place, 1, 0));
|
|
|
|
|
#if defined(PADDLE_WITH_CUDA)
|
|
|
|
|
ASSERT_ANY_THROW(TensorddTest(cuda_pinned_place, 1.0, 0.0));
|
|
|
|
|
ASSERT_ANY_THROW(TensorddTest(cuda_pinned_place,
|
|
|
|
|
static_cast<platform::float16>(1.0),
|
|
|
|
|
static_cast<platform::float16>(2.0)));
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|