You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/paddle/operators/math/math_function_test.cc

246 lines
8.3 KiB

#include "paddle/operators/math/math_function.h"
#include "gtest/gtest.h"
#ifndef PADDLE_ONLY_CPU
TEST(math_function, notrans_mul_trans) {
paddle::framework::Tensor input1;
paddle::framework::Tensor input1_gpu;
paddle::framework::Tensor input2_gpu;
paddle::framework::Tensor out_gpu;
paddle::framework::Tensor out;
auto* cpu_place = new paddle::platform::CPUPlace();
8 years ago
float* input1_ptr = input1.mutable_data<float>({2, 3}, *cpu_place);
float arr[6] = {0, 1, 2, 3, 4, 5};
memcpy(input1_ptr, arr, 6 * sizeof(float));
auto* gpu_place = new paddle::platform::GPUPlace(0);
paddle::platform::CUDADeviceContext context(*gpu_place);
input1_gpu.CopyFrom<float>(input1, *gpu_place);
input2_gpu.CopyFrom<float>(input1, *gpu_place);
8 years ago
out_gpu.mutable_data<float>({2, 2}, *gpu_place);
paddle::operators::math::matmul<paddle::platform::GPUPlace, float>(
context, input1_gpu, false, input2_gpu, true, 1, &out_gpu, 0);
8 years ago
out.CopyFrom<float>(out_gpu, *cpu_place);
float* out_ptr = out.data<float>();
EXPECT_EQ(out_ptr[0], 5);
EXPECT_EQ(out_ptr[1], 14);
EXPECT_EQ(out_ptr[2], 14);
EXPECT_EQ(out_ptr[3], 50);
delete gpu_place;
8 years ago
}
TEST(math_function, trans_mul_notrans) {
8 years ago
paddle::framework::Tensor input1;
paddle::framework::Tensor input1_gpu;
paddle::framework::Tensor input2_gpu;
paddle::framework::Tensor out_gpu;
paddle::framework::Tensor out;
auto* cpu_place = new paddle::platform::CPUPlace();
float* input1_ptr = input1.mutable_data<float>({2, 3}, *cpu_place);
float arr[6] = {0, 1, 2, 3, 4, 5};
memcpy(input1_ptr, arr, 6 * sizeof(float));
auto* gpu_place = new paddle::platform::GPUPlace(0);
paddle::platform::CUDADeviceContext context(*gpu_place);
8 years ago
input1_gpu.CopyFrom<float>(input1, *gpu_place);
input2_gpu.CopyFrom<float>(input1, *gpu_place);
out_gpu.mutable_data<float>({3, 3}, *gpu_place);
paddle::operators::math::matmul<paddle::platform::GPUPlace, float>(
context, input1_gpu, true, input2_gpu, false, 1, &out_gpu, 0);
out.CopyFrom<float>(out_gpu, *cpu_place);
float* out_ptr = out.data<float>();
8 years ago
EXPECT_EQ(out_ptr[0], 9);
EXPECT_EQ(out_ptr[1], 12);
EXPECT_EQ(out_ptr[2], 15);
EXPECT_EQ(out_ptr[3], 12);
EXPECT_EQ(out_ptr[4], 17);
EXPECT_EQ(out_ptr[5], 22);
EXPECT_EQ(out_ptr[6], 15);
EXPECT_EQ(out_ptr[7], 22);
EXPECT_EQ(out_ptr[8], 29);
delete gpu_place;
}
TEST(math_function, gemm_notrans_cublas) {
paddle::framework::Tensor input1;
paddle::framework::Tensor input2;
paddle::framework::Tensor input3;
paddle::framework::Tensor input1_gpu;
paddle::framework::Tensor input2_gpu;
paddle::framework::Tensor input3_gpu;
int m = 2;
int n = 3;
int k = 3;
auto* cpu_place = new paddle::platform::CPUPlace();
float* input1_ptr = input1.mutable_data<float>({2, 3}, *cpu_place);
float arr1[6] = {0, 1, 2, 3, 4, 5};
memcpy(input1_ptr, arr1, 6 * sizeof(float));
float* input2_ptr = input2.mutable_data<float>({3, 4}, *cpu_place);
float arr2[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
memcpy(input2_ptr, arr2, 12 * sizeof(float));
float* input3_ptr = input3.mutable_data<float>({2, 4}, *cpu_place);
float arr3[8] = {0, 1, 2, 3, 4, 5, 6, 7};
memcpy(input3_ptr, arr3, 8 * sizeof(float));
auto* gpu_place = new paddle::platform::GPUPlace(0);
paddle::platform::CUDADeviceContext context(*gpu_place);
input1_gpu.CopyFrom<float>(input1, *gpu_place);
input2_gpu.CopyFrom<float>(input2, *gpu_place);
input3_gpu.CopyFrom<float>(input3, *gpu_place);
float* a = input1_gpu.data<float>();
float* b = input2_gpu.data<float>();
float* c = input3_gpu.mutable_data<float>(*gpu_place);
paddle::operators::math::gemm<paddle::platform::GPUPlace, float>(
context, false, false, m, n, k, 1, a, 3, b + 1, 4, 1, c + 1, 4);
input3.CopyFrom<float>(input3_gpu, *cpu_place);
// numpy code:
// a = np.arange(6).reshape(2, 3)
// b = np.arange(12).reshape(3, 4)[:, 1:]
// c = np.arange(8).reshape(2, 4)[:, 1:]
// out = np.arange(8).reshape(2, 4)
// out[:, 1:] = np.dot(a, b) + c
EXPECT_EQ(input3_ptr[0], 0);
EXPECT_EQ(input3_ptr[1], 24);
EXPECT_EQ(input3_ptr[2], 28);
EXPECT_EQ(input3_ptr[3], 32);
EXPECT_EQ(input3_ptr[4], 4);
EXPECT_EQ(input3_ptr[5], 73);
EXPECT_EQ(input3_ptr[6], 86);
EXPECT_EQ(input3_ptr[7], 99);
delete gpu_place;
}
TEST(math_function, gemm_trans_cublas) {
paddle::framework::Tensor input1;
paddle::framework::Tensor input2;
paddle::framework::Tensor input3;
paddle::framework::Tensor input1_gpu;
paddle::framework::Tensor input2_gpu;
paddle::framework::Tensor input3_gpu;
int m = 2;
int n = 3;
int k = 3;
auto* cpu_place = new paddle::platform::CPUPlace();
float* input1_ptr = input1.mutable_data<float>({2, 3}, *cpu_place);
float arr1[6] = {0, 1, 2, 3, 4, 5};
memcpy(input1_ptr, arr1, 6 * sizeof(float));
float* input2_ptr = input2.mutable_data<float>({4, 3}, *cpu_place);
float arr2[12] = {0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11};
memcpy(input2_ptr, arr2, 12 * sizeof(float));
float* input3_ptr = input3.mutable_data<float>({2, 4}, *cpu_place);
float arr3[8] = {0, 1, 2, 3, 4, 5, 6, 7};
memcpy(input3_ptr, arr3, 8 * sizeof(float));
auto* gpu_place = new paddle::platform::GPUPlace(0);
paddle::platform::CUDADeviceContext context(*gpu_place);
input1_gpu.CopyFrom<float>(input1, *gpu_place);
input2_gpu.CopyFrom<float>(input2, *gpu_place);
input3_gpu.CopyFrom<float>(input3, *gpu_place);
float* a = input1_gpu.data<float>();
float* b = input2_gpu.data<float>();
float* c = input3_gpu.mutable_data<float>(*gpu_place);
paddle::operators::math::gemm<paddle::platform::GPUPlace, float>(
context, false, true, m, n, k, 1, a, 3, b + 3, 3, 1, c + 1, 4);
input3.CopyFrom<float>(input3_gpu, *cpu_place);
EXPECT_EQ(input3_ptr[0], 0);
EXPECT_EQ(input3_ptr[1], 24);
EXPECT_EQ(input3_ptr[2], 28);
EXPECT_EQ(input3_ptr[3], 32);
EXPECT_EQ(input3_ptr[4], 4);
EXPECT_EQ(input3_ptr[5], 73);
EXPECT_EQ(input3_ptr[6], 86);
EXPECT_EQ(input3_ptr[7], 99);
delete gpu_place;
}
#endif
TEST(math_function, gemm_notrans_cblas) {
paddle::framework::Tensor input1;
paddle::framework::Tensor input2;
paddle::framework::Tensor input3;
int m = 2;
int n = 3;
int k = 3;
auto* cpu_place = new paddle::platform::CPUPlace();
float* input1_ptr = input1.mutable_data<float>({2, 3}, *cpu_place);
float arr1[6] = {0, 1, 2, 3, 4, 5};
memcpy(input1_ptr, arr1, 6 * sizeof(float));
float* input2_ptr = input2.mutable_data<float>({3, 4}, *cpu_place);
float arr2[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
memcpy(input2_ptr, arr2, 12 * sizeof(float));
float* input3_ptr = input3.mutable_data<float>({2, 4}, *cpu_place);
float arr3[8] = {0, 1, 2, 3, 4, 5, 6, 7};
memcpy(input3_ptr, arr3, 8 * sizeof(float));
paddle::platform::CPUDeviceContext context(*cpu_place);
paddle::operators::math::gemm<paddle::platform::CPUPlace, float>(
context, false, false, m, n, k, 1, input1_ptr, 3, input2_ptr + 1, 4, 1,
input3_ptr + 1, 4);
EXPECT_EQ(input3_ptr[0], 0);
EXPECT_EQ(input3_ptr[1], 24);
EXPECT_EQ(input3_ptr[2], 28);
EXPECT_EQ(input3_ptr[3], 32);
EXPECT_EQ(input3_ptr[4], 4);
EXPECT_EQ(input3_ptr[5], 73);
EXPECT_EQ(input3_ptr[6], 86);
EXPECT_EQ(input3_ptr[7], 99);
}
TEST(math_function, gemm_trans_clbas) {
paddle::framework::Tensor input1;
paddle::framework::Tensor input2;
paddle::framework::Tensor input3;
int m = 2;
int n = 3;
int k = 3;
auto* cpu_place = new paddle::platform::CPUPlace();
float* input1_ptr = input1.mutable_data<float>({2, 3}, *cpu_place);
float arr1[6] = {0, 1, 2, 3, 4, 5};
memcpy(input1_ptr, arr1, 6 * sizeof(float));
float* input2_ptr = input2.mutable_data<float>({4, 3}, *cpu_place);
float arr2[12] = {0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11};
memcpy(input2_ptr, arr2, 12 * sizeof(float));
float* input3_ptr = input3.mutable_data<float>({2, 4}, *cpu_place);
float arr3[8] = {0, 1, 2, 3, 4, 5, 6, 7};
memcpy(input3_ptr, arr3, 8 * sizeof(float));
paddle::platform::CPUDeviceContext context(*cpu_place);
paddle::operators::math::gemm<paddle::platform::CPUPlace, float>(
context, false, true, m, n, k, 1, input1_ptr, 3, input2_ptr + 3, 3, 1,
input3_ptr + 1, 4);
EXPECT_EQ(input3_ptr[0], 0);
EXPECT_EQ(input3_ptr[1], 24);
EXPECT_EQ(input3_ptr[2], 28);
EXPECT_EQ(input3_ptr[3], 32);
EXPECT_EQ(input3_ptr[4], 4);
EXPECT_EQ(input3_ptr[5], 73);
EXPECT_EQ(input3_ptr[6], 86);
EXPECT_EQ(input3_ptr[7], 99);
}