|
|
|
@ -36,8 +36,9 @@ class Multiply {
|
|
|
|
|
|
|
|
|
|
TEST(Transform, CPUUnary) {
|
|
|
|
|
using namespace paddle::platform;
|
|
|
|
|
CPUDeviceContext ctx;
|
|
|
|
|
float buf[4] = {0.1, 0.2, 0.3, 0.4};
|
|
|
|
|
Transform(CPUPlace(), buf, buf + 4, buf, Scale<float>(10));
|
|
|
|
|
Transform(ctx, buf, buf + 4, buf, Scale<float>(10));
|
|
|
|
|
for (int i = 0; i < 4; ++i) {
|
|
|
|
|
ASSERT_NEAR(buf[i], static_cast<float>(i + 1), 1e-5);
|
|
|
|
|
}
|
|
|
|
@ -47,10 +48,12 @@ TEST(Transform, GPUUnary) {
|
|
|
|
|
using namespace paddle::platform;
|
|
|
|
|
using namespace paddle::memory;
|
|
|
|
|
GPUPlace gpu0(0);
|
|
|
|
|
CUDADeviceContext ctx(gpu0);
|
|
|
|
|
float cpu_buf[4] = {0.1, 0.2, 0.3, 0.4};
|
|
|
|
|
float* gpu_buf = static_cast<float*>(Alloc(gpu0, sizeof(float) * 4));
|
|
|
|
|
Copy(gpu0, gpu_buf, CPUPlace(), cpu_buf, sizeof(cpu_buf));
|
|
|
|
|
Transform(gpu0, gpu_buf, gpu_buf + 4, gpu_buf, Scale<float>(10));
|
|
|
|
|
Transform(ctx, gpu_buf, gpu_buf + 4, gpu_buf, Scale<float>(10));
|
|
|
|
|
ctx.Wait();
|
|
|
|
|
Copy(CPUPlace(), cpu_buf, gpu0, gpu_buf, sizeof(cpu_buf));
|
|
|
|
|
Free(gpu0, gpu_buf);
|
|
|
|
|
for (int i = 0; i < 4; ++i) {
|
|
|
|
@ -62,7 +65,7 @@ TEST(Transform, CPUBinary) {
|
|
|
|
|
using namespace paddle::platform;
|
|
|
|
|
using namespace paddle::memory;
|
|
|
|
|
int buf[4] = {1, 2, 3, 4};
|
|
|
|
|
Transform(CPUPlace(), buf, buf + 4, buf, buf, Multiply<int>());
|
|
|
|
|
Transform(CPUDeviceContext(), buf, buf + 4, buf, buf, Multiply<int>());
|
|
|
|
|
for (int i = 0; i < 4; ++i) {
|
|
|
|
|
ASSERT_EQ((i + 1) * (i + 1), buf[i]);
|
|
|
|
|
}
|
|
|
|
@ -73,9 +76,11 @@ TEST(Transform, GPUBinary) {
|
|
|
|
|
using namespace paddle::memory;
|
|
|
|
|
int buf[4] = {1, 2, 3, 4};
|
|
|
|
|
GPUPlace gpu0(0);
|
|
|
|
|
CUDADeviceContext ctx(gpu0);
|
|
|
|
|
int* gpu_buf = static_cast<int*>(Alloc(gpu0, sizeof(buf)));
|
|
|
|
|
Copy(gpu0, gpu_buf, CPUPlace(), buf, sizeof(buf));
|
|
|
|
|
Transform(gpu0, gpu_buf, gpu_buf + 4, gpu_buf, gpu_buf, Multiply<int>());
|
|
|
|
|
Transform(ctx, gpu_buf, gpu_buf + 4, gpu_buf, gpu_buf, Multiply<int>());
|
|
|
|
|
ctx.Wait();
|
|
|
|
|
Copy(CPUPlace(), buf, gpu0, gpu_buf, sizeof(buf));
|
|
|
|
|
Free(gpu0, gpu_buf);
|
|
|
|
|
for (int i = 0; i < 4; ++i) {
|
|
|
|
|