You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1390 lines
50 KiB
1390 lines
50 KiB
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License. */
|
|
|
|
#include <algorithm>
|
|
#include <iostream>
|
|
#include <random>
|
|
#include <string>
|
|
#include <vector>
|
|
#include "gflags/gflags.h"
|
|
#include "glog/logging.h"
|
|
#include "gtest/gtest.h"
|
|
#include "paddle/fluid/operators/jit/kernels.h"
|
|
#include "paddle/fluid/platform/cpu_info.h"
|
|
#include "paddle/fluid/platform/place.h"
|
|
|
|
DEFINE_double(acc, 1e-5, "Test accuracy threshold.");
|
|
|
|
template <typename T>
|
|
void RandomVec(const int n, T* a, const T lower = static_cast<T>(-2.f),
|
|
const T upper = static_cast<T>(2.f)) {
|
|
static unsigned int seed = 100;
|
|
std::mt19937 rng(seed++);
|
|
std::uniform_real_distribution<double> uniform_dist(0, 1);
|
|
for (int i = 0; i < n; ++i) {
|
|
a[i] = static_cast<T>(uniform_dist(rng) * (upper - lower) + lower);
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
void ExpectEQ(const T* target, const T* refer, size_t n) {
|
|
if (std::is_floating_point<T>::value) {
|
|
for (size_t i = 0; i < n; ++i) {
|
|
EXPECT_NEAR(target[i], refer[i], FLAGS_acc) << " at index : " << i;
|
|
}
|
|
} else {
|
|
for (size_t i = 0; i < n; ++i) {
|
|
EXPECT_EQ(target[i], refer[i]) << " at index : " << i;
|
|
}
|
|
}
|
|
}
|
|
|
|
std::vector<int> TestSizes() {
|
|
std::vector<int> s;
|
|
for (int i = 1; i < 32; ++i) {
|
|
s.push_back(i);
|
|
}
|
|
// test some large size
|
|
s.push_back(100);
|
|
s.push_back(1000);
|
|
s.push_back(2000);
|
|
return s;
|
|
}
|
|
|
|
namespace jit = paddle::operators::jit;
|
|
using CPUPlace = paddle::platform::CPUPlace;
|
|
|
|
template <typename KernelTuple, typename PlaceType, typename Tester,
|
|
typename... Args>
|
|
void TestAllImpls(const typename KernelTuple::attr_type& attr,
|
|
const Tester& verifier, const Args&... args) {
|
|
auto funcs = jit::GetAllCandidateFuncsWithTypes<KernelTuple, PlaceType>(attr);
|
|
for (auto f : funcs) {
|
|
VLOG(10) << "Test Kernel " << f.first;
|
|
verifier(f.second, args...);
|
|
}
|
|
}
|
|
|
|
template <typename KernelTuple, typename PlaceType>
|
|
void TestKernelXYZN() {
|
|
using T = typename KernelTuple::data_type;
|
|
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
|
|
for (int d : TestSizes()) {
|
|
auto ref = jit::GetReferFunc<KernelTuple>();
|
|
EXPECT_TRUE(ref != nullptr);
|
|
|
|
std::vector<T> x(d), y(d), zref(d);
|
|
RandomVec<T>(d, x.data());
|
|
RandomVec<T>(d, y.data());
|
|
|
|
std::vector<T> xinp(d), yinp(d); // inplace test
|
|
std::copy(x.begin(), x.end(), xinp.begin());
|
|
std::copy(y.begin(), y.end(), yinp.begin());
|
|
|
|
const T* x_data = x.data();
|
|
const T* y_data = y.data();
|
|
T* zref_data = zref.data();
|
|
T* xinp_data = xinp.data();
|
|
T* yinp_data = yinp.data();
|
|
|
|
// test refer code inplace
|
|
ref(x_data, y_data, zref_data, d);
|
|
ref(x_data, yinp_data, yinp_data, d);
|
|
ref(xinp_data, y_data, xinp_data, d);
|
|
ExpectEQ<T>(xinp_data, zref_data, d);
|
|
ExpectEQ<T>(yinp_data, zref_data, d);
|
|
|
|
auto verifier = [](const typename KernelTuple::func_type tgt,
|
|
const std::vector<T>& x, const std::vector<T>& y,
|
|
const std::vector<T>& zref) {
|
|
EXPECT_TRUE(tgt != nullptr);
|
|
EXPECT_EQ(zref.size(), x.size());
|
|
EXPECT_EQ(zref.size(), y.size());
|
|
const T* x_data = x.data();
|
|
const T* y_data = y.data();
|
|
const T* zref_data = zref.data();
|
|
const int d = zref.size();
|
|
|
|
std::vector<T> ztgt(d);
|
|
T* ztgt_data = ztgt.data();
|
|
// test normal
|
|
tgt(x_data, y_data, ztgt_data, d);
|
|
ExpectEQ<T>(ztgt_data, zref_data, d);
|
|
// test inplace x
|
|
std::copy(x.begin(), x.end(), ztgt.begin());
|
|
tgt(ztgt_data, y_data, ztgt_data, d);
|
|
ExpectEQ<T>(ztgt_data, zref_data, d);
|
|
// test inplace y
|
|
std::copy(y.begin(), y.end(), ztgt.begin());
|
|
tgt(x_data, ztgt_data, ztgt_data, d);
|
|
ExpectEQ<T>(ztgt_data, zref_data, d);
|
|
};
|
|
|
|
TestAllImpls<KernelTuple, PlaceType>(d, verifier, x, y, zref);
|
|
}
|
|
}
|
|
|
|
template <typename KernelTuple, typename PlaceType>
|
|
void TestKernelAXYN() {
|
|
using T = typename KernelTuple::data_type;
|
|
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
|
|
for (int d : TestSizes()) {
|
|
auto ref = jit::GetReferFunc<KernelTuple>();
|
|
EXPECT_TRUE(ref != nullptr);
|
|
|
|
const T a = static_cast<T>(3);
|
|
std::vector<T> x(d), yref(d);
|
|
std::vector<T> xinp(d); // inplace test
|
|
RandomVec<T>(d, x.data());
|
|
std::copy(x.begin(), x.end(), xinp.begin());
|
|
|
|
const T* x_data = x.data();
|
|
T* yref_data = yref.data();
|
|
T* xinp_data = xinp.data();
|
|
// test refer code inplace
|
|
ref(&a, x_data, yref_data, d);
|
|
ref(&a, xinp_data, xinp_data, d);
|
|
ExpectEQ<T>(xinp_data, yref_data, d);
|
|
|
|
auto verifier = [](const typename KernelTuple::func_type tgt, const T a,
|
|
const std::vector<T>& x, const std::vector<T>& yref) {
|
|
EXPECT_TRUE(tgt != nullptr);
|
|
EXPECT_EQ(yref.size(), x.size());
|
|
const T* x_data = x.data();
|
|
const T* yref_data = yref.data();
|
|
const int d = yref.size();
|
|
std::vector<T> ytgt(d);
|
|
T* ytgt_data = ytgt.data();
|
|
// test normal
|
|
tgt(&a, x_data, ytgt_data, d);
|
|
ExpectEQ<T>(ytgt_data, yref_data, d);
|
|
// test inplace x
|
|
std::copy(x.begin(), x.end(), ytgt.begin());
|
|
tgt(&a, ytgt_data, ytgt_data, d);
|
|
ExpectEQ<T>(ytgt_data, yref_data, d);
|
|
};
|
|
TestAllImpls<KernelTuple, PlaceType>(d, verifier, a, x, yref);
|
|
}
|
|
}
|
|
|
|
template <typename KernelTuple, typename PlaceType>
|
|
void TestKernelXYN() {
|
|
using T = typename KernelTuple::data_type;
|
|
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
|
|
for (int d : TestSizes()) {
|
|
auto ref = jit::GetReferFunc<KernelTuple>();
|
|
EXPECT_TRUE(ref != nullptr);
|
|
|
|
std::vector<T> x(d), yref(d);
|
|
std::vector<T> xinp(d); // inplace test
|
|
RandomVec<T>(d, x.data());
|
|
std::copy(x.begin(), x.end(), xinp.begin());
|
|
|
|
const T* x_data = x.data();
|
|
T* yref_data = yref.data();
|
|
T* xinp_data = xinp.data();
|
|
// test refer code inplace
|
|
ref(x_data, yref_data, d);
|
|
ref(xinp_data, xinp_data, d);
|
|
ExpectEQ<T>(xinp_data, yref_data, d);
|
|
auto verifier = [](const typename KernelTuple::func_type tgt,
|
|
const std::vector<T>& x, const std::vector<T>& yref) {
|
|
EXPECT_TRUE(tgt != nullptr);
|
|
EXPECT_EQ(yref.size(), x.size());
|
|
const T* x_data = x.data();
|
|
const T* yref_data = yref.data();
|
|
const int d = yref.size();
|
|
std::vector<T> ytgt(d);
|
|
T* ytgt_data = ytgt.data();
|
|
// test normal
|
|
tgt(x_data, ytgt_data, d);
|
|
ExpectEQ<T>(ytgt_data, yref_data, d);
|
|
// test inplace x
|
|
std::copy(x.begin(), x.end(), ytgt.begin());
|
|
tgt(ytgt_data, ytgt_data, d);
|
|
ExpectEQ<T>(ytgt_data, yref_data, d);
|
|
};
|
|
TestAllImpls<KernelTuple, PlaceType>(d, verifier, x, yref);
|
|
}
|
|
}
|
|
|
|
template <typename KernelTuple, typename PlaceType>
|
|
void TestKernelXRN() {
|
|
using T = typename KernelTuple::data_type;
|
|
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
|
|
auto last_acc = FLAGS_acc;
|
|
FLAGS_acc = 1e-4;
|
|
for (int d : TestSizes()) {
|
|
auto ref = jit::GetReferFunc<KernelTuple>();
|
|
EXPECT_TRUE(ref != nullptr);
|
|
std::vector<T> x(d);
|
|
RandomVec<T>(d, x.data());
|
|
T ref_res;
|
|
ref(x.data(), &ref_res, d);
|
|
|
|
auto verifier = [](const typename KernelTuple::func_type tgt,
|
|
const std::vector<T>& x, const T ref_res) {
|
|
EXPECT_TRUE(tgt != nullptr);
|
|
T tgt_res;
|
|
tgt(x.data(), &tgt_res, x.size());
|
|
ExpectEQ<T>(&tgt_res, &ref_res, 1);
|
|
};
|
|
TestAllImpls<KernelTuple, PlaceType>(d, verifier, x, ref_res);
|
|
}
|
|
FLAGS_acc = last_acc;
|
|
}
|
|
|
|
template <typename KernelTuple, typename PlaceType>
|
|
void TestKernelLSTM() {
|
|
using T = typename KernelTuple::data_type;
|
|
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
|
|
std::vector<std::string> all_acts = {"sigmoid", "tanh", "relu", "identity"};
|
|
auto test_sizes = TestSizes();
|
|
test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
|
|
for (int d : test_sizes) {
|
|
for (bool use_peephole : {true, false}) {
|
|
for (auto& act_gate : all_acts) {
|
|
for (auto& act_cand : all_acts) {
|
|
for (auto& act_cell : all_acts) {
|
|
const jit::lstm_attr_t attr(
|
|
d, jit::to_kerneltype(act_gate), jit::to_kerneltype(act_cand),
|
|
jit::to_kerneltype(act_cell), use_peephole);
|
|
auto ref = jit::GetReferFunc<KernelTuple>();
|
|
EXPECT_TRUE(ref != nullptr);
|
|
std::vector<T> xsrc(4 * d), wp(3 * d), ct_1(d);
|
|
std::vector<T> ct_ref(d), ht_ref(d), checked(2 * d);
|
|
RandomVec<T>(4 * d, xsrc.data());
|
|
RandomVec<T>(3 * d, wp.data(), -1.f, 1.f);
|
|
RandomVec<T>(d, ct_1.data(), -1.f, 1.f);
|
|
// x could be changed after compute, so copy to save src
|
|
std::vector<T> x(xsrc.size());
|
|
std::copy(xsrc.begin(), xsrc.end(), x.begin());
|
|
const T* ct_1_data = ct_1.data();
|
|
const T* wp_data = wp.data();
|
|
T* x_data = x.data();
|
|
T* checked_data = checked.data();
|
|
T* ct_ref_data = ct_ref.data();
|
|
T* ht_ref_data = ht_ref.data();
|
|
jit::lstm_t step;
|
|
step.gates = x_data;
|
|
step.ct_1 = ct_1_data;
|
|
step.ct = ct_ref_data;
|
|
step.ht = ht_ref_data;
|
|
if (use_peephole) {
|
|
step.wp = wp_data;
|
|
step.checked = checked_data;
|
|
}
|
|
ref(&step, &attr);
|
|
VLOG(10) << attr;
|
|
|
|
auto verifier = [](
|
|
const typename KernelTuple::func_type tgt,
|
|
const std::vector<T>& xsrc, const std::vector<T>& wp,
|
|
const std::vector<T>& ct_1, const std::vector<T>& ct_ref,
|
|
const std::vector<T>& ht_ref,
|
|
const typename KernelTuple::attr_type& attr) {
|
|
EXPECT_TRUE(tgt != nullptr);
|
|
EXPECT_EQ(ct_ref.size(), ht_ref.size());
|
|
EXPECT_EQ(ct_1.size(), ht_ref.size());
|
|
EXPECT_EQ(xsrc.size(), 4 * ht_ref.size());
|
|
EXPECT_EQ(wp.size(), 3 * ht_ref.size());
|
|
|
|
// x could be changed after compute, so copy to save src
|
|
int d = ht_ref.size();
|
|
std::vector<T> x(xsrc.size()), ct(ct_ref.size()),
|
|
ht(ht_ref.size());
|
|
std::vector<T> checked(2 * d);
|
|
std::copy(xsrc.begin(), xsrc.end(), x.begin());
|
|
|
|
const T* ct_1_data = ct_1.data();
|
|
const T* wp_data = wp.data();
|
|
const T* ct_ref_data = ct_ref.data();
|
|
const T* ht_ref_data = ht_ref.data();
|
|
T* x_data = x.data();
|
|
T* ct_data = ct.data();
|
|
T* ht_data = ht.data();
|
|
T* checked_data = checked.data();
|
|
|
|
jit::lstm_t step;
|
|
step.gates = x_data;
|
|
step.ct_1 = ct_1_data;
|
|
step.ct = ct_data;
|
|
step.ht = ht_data;
|
|
if (attr.use_peephole) {
|
|
step.wp = wp_data;
|
|
step.checked = checked_data;
|
|
}
|
|
|
|
tgt(&step, &attr);
|
|
ExpectEQ<T>(ct_data, ct_ref_data, d);
|
|
ExpectEQ<T>(ht_data, ht_ref_data, d);
|
|
};
|
|
TestAllImpls<KernelTuple, PlaceType>(attr, verifier, xsrc, wp, ct_1,
|
|
ct_ref, ht_ref, attr);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename KernelTuple, typename PlaceType>
|
|
void TestKernelGRU() {
|
|
using T = typename KernelTuple::data_type;
|
|
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
|
|
std::vector<std::string> all_acts = {"sigmoid", "tanh", "relu", "identity"};
|
|
auto test_sizes = TestSizes();
|
|
test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
|
|
for (int d : test_sizes) {
|
|
for (auto& act_gate : all_acts) {
|
|
for (auto& act_cand : all_acts) {
|
|
const jit::gru_attr_t attr(d, jit::to_kerneltype(act_gate),
|
|
jit::to_kerneltype(act_cand));
|
|
auto ref = jit::GetReferFunc<KernelTuple>();
|
|
EXPECT_TRUE(ref != nullptr);
|
|
std::vector<T> xsrc(3 * d), ht_1(d), ht_ref(d);
|
|
RandomVec<T>(3 * d, xsrc.data());
|
|
RandomVec<T>(d, ht_1.data());
|
|
// x could be changed after compute, so copy to save src
|
|
std::vector<T> x(xsrc.size());
|
|
std::copy(xsrc.begin(), xsrc.end(), x.begin());
|
|
const T* ht_1_data = ht_1.data();
|
|
T* x_data = x.data();
|
|
T* ht_ref_data = ht_ref.data();
|
|
jit::gru_t step;
|
|
step.gates = x_data;
|
|
step.ht_1 = ht_1_data;
|
|
step.ht = ht_ref_data;
|
|
ref(&step, &attr);
|
|
VLOG(10) << attr;
|
|
auto verifier = [](const typename KernelTuple::func_type tgt,
|
|
const std::vector<T>& xsrc,
|
|
const std::vector<T>& ht_1,
|
|
const std::vector<T>& ht_ref,
|
|
const typename KernelTuple::attr_type& attr) {
|
|
EXPECT_TRUE(tgt != nullptr);
|
|
EXPECT_EQ(ht_1.size(), ht_ref.size());
|
|
EXPECT_EQ(xsrc.size(), 3 * ht_ref.size());
|
|
|
|
// x could be changed after compute, so copy to save src
|
|
int d = ht_ref.size();
|
|
std::vector<T> x(xsrc.size()), ht(ht_ref.size());
|
|
std::copy(xsrc.begin(), xsrc.end(), x.begin());
|
|
const T* ht_1_data = ht_1.data();
|
|
const T* ht_ref_data = ht_ref.data();
|
|
T* x_data = x.data();
|
|
T* ht_data = ht.data();
|
|
jit::gru_t step;
|
|
step.gates = x_data;
|
|
step.ht_1 = ht_1_data;
|
|
step.ht = ht_data;
|
|
tgt(&step, &attr);
|
|
ExpectEQ<T>(ht_data, ht_ref_data, d);
|
|
};
|
|
TestAllImpls<KernelTuple, PlaceType>(attr, verifier, xsrc, ht_1, ht_ref,
|
|
attr);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename KernelTuple, typename PlaceType>
|
|
void TestKernelNCHW16CMulNC() {
|
|
using T = typename KernelTuple::data_type;
|
|
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
|
|
const int n = 3, c = 16 * 4, h = 10, w = 10;
|
|
auto ref = jit::GetReferFunc<KernelTuple>();
|
|
EXPECT_TRUE(ref != nullptr);
|
|
int sz = n * c * h * w;
|
|
std::vector<T> x(sz), y(n * c), zref(sz);
|
|
std::vector<T> ztgt(sz), zjit(sz);
|
|
RandomVec<T>(sz, x.data());
|
|
RandomVec<T>(n * c, y.data());
|
|
|
|
const T* x_data = x.data();
|
|
const T* y_data = y.data();
|
|
T* zref_data = zref.data();
|
|
T* ztgt_data = ztgt.data();
|
|
T* zjit_data = zjit.data();
|
|
constexpr int simd_width = ZMM_FLOAT_BLOCK;
|
|
int C = c / simd_width;
|
|
auto tgt = jit::KernelFuncs<KernelTuple, PlaceType>::Cache().At(0);
|
|
auto funcs = jit::GetAllCandidateFuncs<KernelTuple, PlaceType>(0);
|
|
EXPECT_GT(funcs.size(), 0UL);
|
|
auto jitcode = funcs[0];
|
|
EXPECT_TRUE(tgt != nullptr);
|
|
|
|
if (std::is_same<T, float>::value &&
|
|
paddle::platform::MayIUse(paddle::platform::avx512f)) {
|
|
EXPECT_TRUE(jitcode != nullptr);
|
|
}
|
|
for (int ni = 0; ni < n; ni++) {
|
|
for (int ci = 0; ci < C; ci++) {
|
|
auto ptr_x =
|
|
x_data + ni * C * h * w * simd_width + ci * h * w * simd_width;
|
|
auto ptr_y = y_data + ni * C * simd_width + ci * simd_width;
|
|
auto ptr_zref =
|
|
zref_data + ni * C * h * w * simd_width + ci * h * w * simd_width;
|
|
auto ptr_ztgt =
|
|
ztgt_data + ni * C * h * w * simd_width + ci * h * w * simd_width;
|
|
|
|
ref(ptr_x, ptr_y, ptr_zref, h, w);
|
|
tgt(ptr_x, ptr_y, ptr_ztgt, h, w);
|
|
|
|
if (jitcode) {
|
|
auto ptr_zjit =
|
|
zjit_data + ni * C * h * w * simd_width + ci * h * w * simd_width;
|
|
jitcode(ptr_x, ptr_y, ptr_zjit, h, w);
|
|
}
|
|
}
|
|
}
|
|
ExpectEQ<T>(ztgt_data, zref_data, sz);
|
|
if (jitcode) {
|
|
ExpectEQ<T>(zjit_data, zref_data, sz);
|
|
}
|
|
}
|
|
|
|
template <typename KernelTuple, typename PlaceType>
|
|
void TestKernelLayerNorm() {
|
|
using T = typename KernelTuple::data_type;
|
|
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
|
|
const T epsilon = 9.99999975e-06;
|
|
for (int n : {1, 2, 10}) {
|
|
for (int x_dim_0 : {1, 9, 17, 50}) {
|
|
int left = n * x_dim_0;
|
|
for (int x_dim_1 : TestSizes()) {
|
|
int right = x_dim_1;
|
|
auto ref = jit::GetReferFunc<KernelTuple>();
|
|
EXPECT_TRUE(ref != nullptr);
|
|
int sz = left * right;
|
|
std::vector<T> x(sz), mean(left), var(left), scale(right), bias(right),
|
|
outref(sz);
|
|
RandomVec<T>(sz, x.data());
|
|
RandomVec<T>(left, mean.data());
|
|
RandomVec<T>(left, var.data());
|
|
RandomVec<T>(right, scale.data());
|
|
RandomVec<T>(right, bias.data());
|
|
|
|
const T* scale_data = scale.data();
|
|
const T* bias_data = bias.data();
|
|
T* x_data = x.data();
|
|
T* mean_data = mean.data();
|
|
T* var_data = var.data();
|
|
T* outref_data = outref.data();
|
|
|
|
ref(x_data, outref_data, mean_data, var_data, scale_data, bias_data,
|
|
left, epsilon, right);
|
|
|
|
auto verifier = [](
|
|
const typename KernelTuple::func_type tgt, const std::vector<T>& x_,
|
|
const std::vector<T>& outref_, const std::vector<T>& mean_,
|
|
const std::vector<T>& var_, const std::vector<T>& scale,
|
|
const std::vector<T>& bias, const int& left, const float& epsilon,
|
|
const typename KernelTuple::attr_type& right) {
|
|
EXPECT_TRUE(tgt != nullptr);
|
|
std::vector<T> outtgt(outref_.size());
|
|
std::vector<T> x(x_.size());
|
|
std::vector<T> mean(mean_.size());
|
|
std::vector<T> var(var_.size());
|
|
std::vector<T> outref(outref_.size());
|
|
std::copy(x_.begin(), x_.end(), x.begin());
|
|
std::copy(mean_.begin(), mean_.end(), mean.begin());
|
|
std::copy(var_.begin(), var_.end(), var.begin());
|
|
std::copy(outref_.begin(), outref_.end(), outref.begin());
|
|
|
|
EXPECT_EQ(x.size(), static_cast<size_t>(left * right));
|
|
EXPECT_EQ(outref.size(), static_cast<size_t>(left * right));
|
|
EXPECT_EQ(mean.size(), static_cast<size_t>(left));
|
|
EXPECT_EQ(var.size(), static_cast<size_t>(left));
|
|
EXPECT_EQ(scale.size(), static_cast<size_t>(right));
|
|
EXPECT_EQ(bias.size(), static_cast<size_t>(right));
|
|
|
|
const T* scale_data = scale.data();
|
|
const T* bias_data = bias.data();
|
|
T* x_data = x.data();
|
|
T* mean_data = mean.data();
|
|
T* var_data = var.data();
|
|
T* outref_data = outref.data();
|
|
T* outtgt_data = outtgt.data();
|
|
tgt(x_data, outtgt_data, mean_data, var_data, scale_data, bias_data,
|
|
left, epsilon, right);
|
|
ExpectEQ<T>(outtgt_data, outref_data, left * right);
|
|
};
|
|
TestAllImpls<KernelTuple, PlaceType>(right, verifier, x, outref, mean,
|
|
var, scale, bias, left, epsilon,
|
|
right);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename KernelTuple, typename PlaceType>
|
|
void TestKernelCRFDecoding() {
|
|
using T = typename KernelTuple::data_type;
|
|
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
|
|
constexpr int state_trans_base_idx = 2;
|
|
auto test_sizes = TestSizes();
|
|
test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 2000));
|
|
for (int seq_len : {1, 11, 17, 50}) {
|
|
for (int tag_num : test_sizes) {
|
|
auto ref = jit::GetReferFunc<KernelTuple>();
|
|
EXPECT_TRUE(ref != nullptr);
|
|
int x_sz = seq_len * tag_num;
|
|
int w_sz = (tag_num + state_trans_base_idx) * tag_num;
|
|
std::vector<T> x(x_sz), w(w_sz), alpharef(x_sz);
|
|
std::vector<int> trackref(x_sz);
|
|
RandomVec<T>(x_sz, x.data());
|
|
RandomVec<T>(w_sz, w.data());
|
|
|
|
ref(seq_len, (const T*)x.data(), (const T*)w.data(), alpharef.data(),
|
|
trackref.data(), tag_num);
|
|
|
|
auto verifier = [](
|
|
const typename KernelTuple::func_type tgt, const int& seq_len,
|
|
const std::vector<T>& x, const std::vector<T>& w,
|
|
const std::vector<T>& alpharef, const std::vector<int>& trackref,
|
|
const typename KernelTuple::attr_type& tag_num) {
|
|
constexpr int state_trans_base_idx = 2;
|
|
EXPECT_TRUE(tgt != nullptr);
|
|
EXPECT_EQ(x.size(), static_cast<size_t>(seq_len * tag_num));
|
|
EXPECT_EQ(w.size(), static_cast<size_t>(
|
|
(tag_num + state_trans_base_idx) * tag_num));
|
|
EXPECT_EQ(alpharef.size(), static_cast<size_t>(seq_len * tag_num));
|
|
EXPECT_EQ(trackref.size(), static_cast<size_t>(seq_len * tag_num));
|
|
std::vector<T> alphatgt(alpharef.size());
|
|
std::vector<int> tracktgt(trackref.size());
|
|
memcpy(tracktgt.data(), trackref.data(), tag_num * sizeof(int));
|
|
tgt(seq_len, (const T*)x.data(), (const T*)w.data(), alphatgt.data(),
|
|
tracktgt.data(), tag_num);
|
|
ExpectEQ<T>(alpharef.data(), alphatgt.data(), seq_len * tag_num);
|
|
ExpectEQ<int>(trackref.data(), tracktgt.data(), seq_len * tag_num);
|
|
};
|
|
TestAllImpls<KernelTuple, PlaceType>(tag_num, verifier, seq_len, x, w,
|
|
alpharef, trackref, tag_num);
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename KernelTuple, typename PlaceType>
|
|
void TestKernelSeqPool() {
|
|
using T = typename KernelTuple::data_type;
|
|
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
|
|
std::vector<jit::SeqPoolType> pool_types = {
|
|
jit::SeqPoolType::kSum, jit::SeqPoolType::kAvg, jit::SeqPoolType::kSqrt};
|
|
auto test_sizes = TestSizes();
|
|
test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
|
|
for (auto type : pool_types) {
|
|
for (int w : test_sizes) {
|
|
jit::seq_pool_attr_t attr(w, type);
|
|
for (int h : test_sizes) {
|
|
attr.h = h;
|
|
auto ref = jit::GetReferFunc<KernelTuple>();
|
|
EXPECT_TRUE(ref != nullptr);
|
|
std::vector<T> x(h * w), yref(w);
|
|
RandomVec<T>(h * w, x.data());
|
|
const T* x_data = x.data();
|
|
T* yref_data = yref.data();
|
|
ref(x_data, yref_data, &attr);
|
|
VLOG(10) << attr;
|
|
auto verifier = [](const typename KernelTuple::func_type tgt,
|
|
const std::vector<T>& x, const std::vector<T>& yref,
|
|
const typename KernelTuple::attr_type& attr) {
|
|
EXPECT_TRUE(tgt != nullptr);
|
|
EXPECT_EQ(x.size() % yref.size(), static_cast<size_t>(0));
|
|
int w = yref.size();
|
|
std::vector<T> y(w);
|
|
const T* x_data = x.data();
|
|
const T* yref_data = yref.data();
|
|
T* y_data = y.data();
|
|
tgt(x_data, y_data, &attr);
|
|
ExpectEQ<T>(y_data, yref_data, w);
|
|
};
|
|
TestAllImpls<KernelTuple, PlaceType>(attr, verifier, x, yref, attr);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename KernelTuple, typename PlaceType>
|
|
void TestKernelEmbSeqPool() {
|
|
using T = typename KernelTuple::data_type;
|
|
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
|
|
int64_t tbl_h = 1e4;
|
|
std::vector<jit::SeqPoolType> pool_types = {
|
|
jit::SeqPoolType::kSum}; // only support sum yet
|
|
auto test_sizes = TestSizes();
|
|
test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
|
|
for (int tbl_w : test_sizes) {
|
|
std::vector<T> table(tbl_h * tbl_w);
|
|
RandomVec<T>(tbl_h * tbl_w, table.data());
|
|
const T* table_data = table.data();
|
|
for (auto type : pool_types) {
|
|
for (int idx_w : {1, 2, 10, 16}) {
|
|
for (int idx_h : {1, 2, 9, 13, 16}) {
|
|
auto ref = jit::GetReferFunc<KernelTuple>();
|
|
EXPECT_TRUE(ref != nullptr);
|
|
std::vector<int64_t> idx(idx_h * idx_w);
|
|
RandomVec<int64_t>(idx_h * idx_w, idx.data(), 0, tbl_h - 1);
|
|
int64_t out_w = tbl_w * idx_w;
|
|
std::vector<T> oref(out_w);
|
|
const int64_t* idx_data = idx.data();
|
|
T* o_data = oref.data();
|
|
jit::emb_seq_pool_attr_t attr(tbl_h, tbl_w, idx_h, idx_w, out_w,
|
|
type);
|
|
ref(table_data, idx_data, o_data, &attr);
|
|
|
|
auto verifier = [](const typename KernelTuple::func_type tgt,
|
|
const std::vector<T>& table,
|
|
const std::vector<int64_t>& idx,
|
|
const std::vector<T>& oref,
|
|
const typename KernelTuple::attr_type& attr) {
|
|
EXPECT_TRUE(tgt != nullptr);
|
|
EXPECT_EQ(table.size(), static_cast<size_t>(attr.table_height *
|
|
attr.table_width));
|
|
EXPECT_EQ(idx.size(), static_cast<size_t>(attr.index_height *
|
|
attr.index_width));
|
|
EXPECT_EQ(oref.size(),
|
|
static_cast<size_t>(attr.table_width * attr.index_width));
|
|
const T* table_data = table.data();
|
|
const int64_t* idx_data = idx.data();
|
|
const T* oref_data = oref.data();
|
|
int o_w = oref.size();
|
|
std::vector<T> out(o_w);
|
|
T* o_data = out.data();
|
|
tgt(table_data, idx_data, o_data, &attr);
|
|
ExpectEQ<T>(o_data, oref_data, o_w);
|
|
};
|
|
TestAllImpls<KernelTuple, PlaceType>(attr, verifier, table, idx, oref,
|
|
attr);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename KernelTuple, typename PlaceType>
|
|
void TestKernelMatMul() {
|
|
using T = typename KernelTuple::data_type;
|
|
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
|
|
auto last_acc = FLAGS_acc;
|
|
// export MKL_CBWR=AVX would make MKL force to use AVX
|
|
// export KMP_DETERMINISTIC_REDUCTION=yes would make the result deterministic
|
|
FLAGS_acc = 1e-3;
|
|
for (int m : {1, 2, 3, 4}) {
|
|
for (int n : {1, 2, 3, 4}) {
|
|
for (int k : TestSizes()) {
|
|
auto ref = jit::GetReferFunc<KernelTuple>();
|
|
EXPECT_TRUE(ref != nullptr);
|
|
std::vector<T> a(m * k), b(k * n), c(m * n);
|
|
RandomVec<T>(m * k, a.data());
|
|
RandomVec<T>(k * n, b.data());
|
|
const T* a_data = a.data();
|
|
const T* b_data = b.data();
|
|
T* c_data = c.data();
|
|
const jit::matmul_attr_t attr{m, n, k};
|
|
ref(a_data, b_data, c_data, &attr);
|
|
auto verifier = [](const typename KernelTuple::func_type tgt,
|
|
const std::vector<T>& a, const std::vector<T>& b,
|
|
const std::vector<T>& cref,
|
|
const typename KernelTuple::attr_type& attr) {
|
|
EXPECT_TRUE(tgt != nullptr);
|
|
EXPECT_EQ(a.size(), static_cast<size_t>(attr.m * attr.k));
|
|
EXPECT_EQ(b.size(), static_cast<size_t>(attr.k * attr.n));
|
|
EXPECT_EQ(cref.size(), static_cast<size_t>(attr.m * attr.n));
|
|
std::vector<T> c(cref.size());
|
|
const T* a_data = a.data();
|
|
const T* b_data = b.data();
|
|
const T* cref_data = cref.data();
|
|
T* c_data = c.data();
|
|
tgt(a_data, b_data, c_data, &attr);
|
|
ExpectEQ<T>(c_data, cref_data, attr.m * attr.n);
|
|
};
|
|
TestAllImpls<KernelTuple, PlaceType>(attr, verifier, a, b, c, attr);
|
|
}
|
|
}
|
|
}
|
|
FLAGS_acc = last_acc;
|
|
}
|
|
|
|
template <typename KernelTuple, typename PlaceType>
|
|
void TestKernelSoftmax() {
|
|
using T = typename KernelTuple::data_type;
|
|
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
|
|
for (int bs : {1, 2, 10}) {
|
|
for (int n : TestSizes()) {
|
|
for (int m : {1, 2, 3}) { // remain
|
|
if (m > n || n % m != 0) {
|
|
continue;
|
|
}
|
|
auto ref = jit::GetReferFunc<KernelTuple>();
|
|
EXPECT_TRUE(ref != nullptr);
|
|
std::vector<T> x(bs * n), y(bs * n);
|
|
RandomVec<T>(bs * n, x.data());
|
|
const T* x_data = x.data();
|
|
T* y_data = y.data();
|
|
|
|
std::vector<T> xinp(x.size()); // inplace test
|
|
std::copy(x.begin(), x.end(), xinp.begin());
|
|
ref(x_data, y_data, n, bs, m);
|
|
T* xinp_data = xinp.data();
|
|
ref(xinp_data, xinp_data, n, bs, m);
|
|
ExpectEQ<T>(xinp_data, y_data, n * bs);
|
|
|
|
auto verifier = [](const typename KernelTuple::func_type tgt,
|
|
const std::vector<T>& x, const std::vector<T>& yref,
|
|
int n, int bs, int m) {
|
|
EXPECT_TRUE(tgt != nullptr);
|
|
EXPECT_EQ(yref.size(), x.size());
|
|
EXPECT_EQ(x.size(), static_cast<size_t>(n * bs));
|
|
const T* x_data = x.data();
|
|
const T* yref_data = yref.data();
|
|
std::vector<T> ytgt(n * bs);
|
|
T* ytgt_data = ytgt.data();
|
|
// test normal
|
|
tgt(x_data, ytgt_data, n, bs, m);
|
|
ExpectEQ<T>(ytgt_data, yref_data, n * bs);
|
|
// test inplace x
|
|
std::copy(x.begin(), x.end(), ytgt.begin());
|
|
tgt(ytgt_data, ytgt_data, n, bs, m);
|
|
ExpectEQ<T>(ytgt_data, yref_data, n * bs);
|
|
};
|
|
TestAllImpls<KernelTuple, PlaceType>(n, verifier, x, y, n, bs, m);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename KernelTuple, typename PlaceType>
|
|
void TestKernelStrideASum() {
|
|
using T = typename KernelTuple::data_type;
|
|
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
|
|
for (int d : TestSizes()) {
|
|
for (int m : {1, 2, 3}) { // stride
|
|
if (m > d || d % m != 0) {
|
|
continue;
|
|
}
|
|
auto ref = jit::GetReferFunc<KernelTuple>();
|
|
EXPECT_TRUE(ref != nullptr);
|
|
std::vector<T> x(d);
|
|
RandomVec<T>(d, x.data());
|
|
T ref_res;
|
|
ref(x.data(), &ref_res, d, m);
|
|
|
|
auto verifier = [](const typename KernelTuple::func_type tgt,
|
|
const std::vector<T>& x, const T ref_res,
|
|
const int m) {
|
|
EXPECT_TRUE(tgt != nullptr);
|
|
T tgt_res;
|
|
tgt(x.data(), &tgt_res, x.size(), m);
|
|
ExpectEQ<T>(&tgt_res, &ref_res, 1);
|
|
};
|
|
TestAllImpls<KernelTuple, PlaceType>(d, verifier, x, ref_res, m);
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename KernelTuple, typename PlaceType>
|
|
void TestKernelStrideScal() {
|
|
using T = typename KernelTuple::data_type;
|
|
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
|
|
for (int d : TestSizes()) {
|
|
for (int m : {1, 2, 3}) { // stride
|
|
if (m > d || d % m != 0) {
|
|
continue;
|
|
}
|
|
auto ref = jit::GetReferFunc<KernelTuple>();
|
|
EXPECT_TRUE(ref != nullptr);
|
|
|
|
const T a = static_cast<T>(3);
|
|
std::vector<T> x(d), yref(d);
|
|
std::vector<T> xinp(d); // inplace test
|
|
RandomVec<T>(d, x.data());
|
|
std::copy(x.begin(), x.end(), xinp.begin());
|
|
|
|
const T* x_data = x.data();
|
|
T* yref_data = yref.data();
|
|
T* xinp_data = xinp.data();
|
|
// test refer code inplace
|
|
ref(&a, x_data, yref_data, d, m);
|
|
ref(&a, xinp_data, xinp_data, d, m);
|
|
ExpectEQ<T>(xinp_data, yref_data, d);
|
|
|
|
auto verifier = [](const typename KernelTuple::func_type tgt, const T a,
|
|
const std::vector<T>& x, const std::vector<T>& yref,
|
|
const int m) {
|
|
EXPECT_TRUE(tgt != nullptr);
|
|
EXPECT_EQ(yref.size(), x.size());
|
|
const T* x_data = x.data();
|
|
const T* yref_data = yref.data();
|
|
const int d = yref.size();
|
|
std::vector<T> ytgt(d);
|
|
T* ytgt_data = ytgt.data();
|
|
// test normal
|
|
tgt(&a, x_data, ytgt_data, d, m);
|
|
ExpectEQ<T>(ytgt_data, yref_data, d);
|
|
// test inplace x
|
|
std::copy(x.begin(), x.end(), ytgt.begin());
|
|
tgt(&a, ytgt_data, ytgt_data, d, m);
|
|
ExpectEQ<T>(ytgt_data, yref_data, d);
|
|
};
|
|
TestAllImpls<KernelTuple, PlaceType>(d, verifier, a, x, yref, m);
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename KernelTuple, typename PlaceType>
|
|
void TestKernelSgd() {
|
|
using T = typename KernelTuple::data_type;
|
|
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
|
|
const T lr = 0.1;
|
|
auto UnDuplicatedRandomVec = [](int n, const int64_t lower,
|
|
const int64_t upper) -> std::vector<int64_t> {
|
|
PADDLE_ENFORCE_LE(static_cast<size_t>(upper - lower), n - 1,
|
|
paddle::platform::errors::InvalidArgument(
|
|
"The range of Sgd (upper - lower) should be lower "
|
|
"than n-1 (Sgd size -1). But the upper - lower is %d "
|
|
"and n-1 is %d.",
|
|
static_cast<size_t>(upper - lower), n - 1));
|
|
PADDLE_ENFORCE_GT(
|
|
n, 0, paddle::platform::errors::InvalidArgument(
|
|
"The Sgd size should be larger than 0. But the n is %d.", n));
|
|
std::vector<int64_t> all, out;
|
|
for (int i = 0; i < n; ++i) {
|
|
all.push_back(i);
|
|
}
|
|
std::random_shuffle(all.begin(), all.end());
|
|
out.insert(out.begin(), all.begin(), all.begin() + n);
|
|
return out;
|
|
};
|
|
for (int param_h : {1, 10}) {
|
|
for (int grad_w : TestSizes()) {
|
|
std::vector<T> param(param_h * grad_w);
|
|
std::vector<T> param_out(param_h * grad_w);
|
|
RandomVec<T>(param_h * grad_w, param.data());
|
|
const T* param_data = param.data();
|
|
T* out_data = param_out.data();
|
|
for (int rows_size = 1; rows_size <= param_h; ++rows_size) {
|
|
std::vector<T> grad(rows_size * grad_w);
|
|
std::vector<int64_t> rows =
|
|
UnDuplicatedRandomVec(rows_size, 0, rows_size - 1);
|
|
RandomVec<T>(rows_size * grad_w, grad.data());
|
|
const int64_t* rows_data = rows.data();
|
|
const T* grad_data = grad.data();
|
|
auto ref = jit::GetReferFunc<KernelTuple>();
|
|
EXPECT_TRUE(ref != nullptr);
|
|
jit::sgd_attr_t attr(param_h, grad_w, rows_size, grad_w, rows_size);
|
|
ref(&lr, param_data, grad_data, rows_data, out_data, &attr);
|
|
|
|
// inplace test
|
|
std::vector<T> inp(param.size());
|
|
std::copy(param.begin(), param.end(), inp.begin());
|
|
T* inp_data = inp.data();
|
|
ref(&lr, inp_data, grad_data, rows_data, inp_data, &attr);
|
|
// only the selected rows should be equal
|
|
for (int i = 0; i < rows_size; ++i) {
|
|
ExpectEQ<T>(inp_data + rows[i] * grad_w, out_data + rows[i] * grad_w,
|
|
grad_w);
|
|
}
|
|
|
|
auto verifier = [](
|
|
const typename KernelTuple::func_type tgt, const T lr,
|
|
const std::vector<T>& param, const std::vector<T>& grad,
|
|
const std::vector<int64_t>& rows, const std::vector<T>& oref,
|
|
const typename KernelTuple::attr_type& attr) {
|
|
EXPECT_TRUE(tgt != nullptr);
|
|
EXPECT_EQ(param.size(),
|
|
static_cast<size_t>(attr.param_height * attr.param_width));
|
|
EXPECT_EQ(grad.size(),
|
|
static_cast<size_t>(attr.grad_height * attr.grad_width));
|
|
EXPECT_EQ(rows.size(), static_cast<size_t>(attr.selected_rows_size));
|
|
EXPECT_EQ(param.size(), oref.size());
|
|
const T* param_data = param.data();
|
|
const T* grad_data = grad.data();
|
|
const int64_t* rows_data = rows.data();
|
|
const T* oref_data = oref.data();
|
|
|
|
std::vector<T> out(oref.size());
|
|
T* o_data = out.data();
|
|
tgt(&lr, param_data, grad_data, rows_data, o_data, &attr);
|
|
// only the selected rows should be equal
|
|
for (size_t i = 0; i < rows.size(); ++i) {
|
|
ExpectEQ<T>(o_data + rows[i] * attr.grad_width,
|
|
oref_data + rows[i] * attr.grad_width, attr.grad_width);
|
|
}
|
|
|
|
// inplace
|
|
std::copy(param.begin(), param.end(), out.begin());
|
|
tgt(&lr, o_data, grad_data, rows_data, o_data, &attr);
|
|
for (size_t i = 0; i < rows.size(); ++i) {
|
|
ExpectEQ<T>(o_data + rows[i] * attr.grad_width,
|
|
oref_data + rows[i] * attr.grad_width, attr.grad_width);
|
|
}
|
|
};
|
|
TestAllImpls<KernelTuple, PlaceType>(attr, verifier, lr, param, grad,
|
|
rows, param_out, attr);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename KernelTuple, typename PlaceType>
|
|
void TestKernelVBroadcast() {
|
|
using T = typename KernelTuple::data_type;
|
|
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
|
|
for (int w : TestSizes()) {
|
|
std::vector<T> x(w);
|
|
RandomVec<T>(w, x.data());
|
|
const T* x_data = x.data();
|
|
for (int64_t h : {1, 2, 6}) {
|
|
auto ref = jit::GetReferFunc<KernelTuple>();
|
|
EXPECT_TRUE(ref != nullptr);
|
|
std::vector<T> y(w * h);
|
|
T* y_data = y.data();
|
|
ref(x_data, y_data, h, w);
|
|
|
|
auto verifier = [](const typename KernelTuple::func_type tgt,
|
|
const std::vector<T>& x, const std::vector<T>& yref,
|
|
const int64_t& h,
|
|
const typename KernelTuple::attr_type& attr) {
|
|
EXPECT_TRUE(tgt != nullptr);
|
|
EXPECT_EQ(x.size(), static_cast<size_t>(attr));
|
|
EXPECT_EQ(yref.size(), x.size() * h);
|
|
std::vector<T> y(yref.size());
|
|
const T* x_data = x.data();
|
|
const T* yref_data = yref.data();
|
|
T* y_data = y.data();
|
|
tgt(x_data, y_data, h, attr);
|
|
ExpectEQ<T>(y_data, yref_data, yref.size());
|
|
};
|
|
TestAllImpls<KernelTuple, PlaceType>(static_cast<int64_t>(w), verifier, x,
|
|
y, h, static_cast<int64_t>(w));
|
|
}
|
|
}
|
|
}
|
|
|
|
// test pool
|
|
TEST(JITKernel_pool, jitcreator) {
|
|
const auto& jitcreators = jit::JitCodeCreatorPool::Instance().AllCreators();
|
|
#if defined(_WIN32) || defined(__APPLE__) || defined(__OSX__)
|
|
EXPECT_EQ(jitcreators.size(), 0UL);
|
|
#else
|
|
EXPECT_EQ(jitcreators.size(), 25UL);
|
|
#endif
|
|
}
|
|
|
|
TEST(JITKernel_pool, jitpool) {
|
|
// jitpool is related with attr
|
|
const auto& kers = jit::JitCodePool<jit::kVAdd>().Instance().AllKernels();
|
|
EXPECT_EQ(kers.size(), 0UL);
|
|
jit::GetAllCandidateKernels<jit::VAddTuple<float>, CPUPlace>(3);
|
|
// after call GetAllCandidateKernels, it will create jitcode Automatically
|
|
#if defined(_WIN32) || defined(__APPLE__) || defined(__OSX__)
|
|
EXPECT_EQ(kers.size(), 0UL);
|
|
#else
|
|
EXPECT_EQ(kers.size(), 1UL);
|
|
#endif
|
|
}
|
|
|
|
TEST(JITKernel_pool, more) {
|
|
const auto& kers = jit::KernelPool::Instance().AllKernels();
|
|
size_t target_num = 8;
|
|
|
|
#ifdef __AVX__
|
|
target_num += 2;
|
|
#endif
|
|
|
|
#ifdef PADDLE_WITH_MKLML
|
|
target_num += 12;
|
|
#endif
|
|
|
|
EXPECT_EQ(kers.size(), target_num);
|
|
}
|
|
|
|
TEST(JITKernel_pool, refer) {
|
|
const auto& kers = jit::ReferKernelPool::Instance().AllKernels();
|
|
EXPECT_EQ(kers.size(), 31UL);
|
|
}
|
|
|
|
// test helper
|
|
TEST(JITKernel_helper, GetAllCandidateKernels) {
|
|
auto fp_kers =
|
|
jit::GetAllCandidateKernels<jit::VExpTuple<float>, CPUPlace>(10);
|
|
#if defined(_WIN32) || defined(__APPLE__) || defined(__OSX__)
|
|
EXPECT_GE(fp_kers.size(), 1UL); // refer
|
|
#else
|
|
#ifdef PADDLE_WITH_MKLML
|
|
EXPECT_GE(fp_kers.size(), 3UL); // jitcode, mkl, refer
|
|
#else
|
|
EXPECT_GE(fp_kers.size(), 2UL); // jitcode, refer
|
|
#endif
|
|
#endif
|
|
|
|
auto db_kers =
|
|
jit::GetAllCandidateKernels<jit::VExpTuple<double>, CPUPlace>(10);
|
|
#if defined(_WIN32) || defined(__APPLE__) || defined(__OSX__)
|
|
EXPECT_GE(db_kers.size(), 1UL); // refer
|
|
#else
|
|
#ifdef PADDLE_WITH_MKLML
|
|
EXPECT_GE(db_kers.size(), 2UL); // mkl, refer
|
|
#else
|
|
EXPECT_GE(db_kers.size(), 1UL); // refer
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
TEST(JITKernel_helper, GetAllCandidateFuncsWithTypes) {
|
|
auto fp_kers =
|
|
jit::GetAllCandidateFuncsWithTypes<jit::VExpTuple<float>, CPUPlace>(10);
|
|
#if defined(__APPLE__) || defined(__OSX__)
|
|
EXPECT_GE(fp_kers.size(), 1UL); // refer
|
|
#else
|
|
#if !defined(PADDLE_WITH_MKLML) || defined(_WIN32)
|
|
EXPECT_GE(fp_kers.size(), 2UL); // jitcode/mkl, refer
|
|
#else
|
|
EXPECT_GE(fp_kers.size(), 3UL); // jitcode, mkl, refer
|
|
#endif
|
|
#endif
|
|
|
|
auto db_kers =
|
|
jit::GetAllCandidateFuncsWithTypes<jit::VExpTuple<double>, CPUPlace>(10);
|
|
#if defined(__APPLE__) || defined(__OSX__) || !defined(PADDLE_WITH_MKLML)
|
|
EXPECT_GE(db_kers.size(), 1UL); // refer
|
|
#else
|
|
EXPECT_GE(db_kers.size(), 2UL); // mkl, refer
|
|
#endif
|
|
}
|
|
|
|
TEST(JITKernel_helper, KernelFuncs) {
|
|
auto f1 = jit::KernelFuncs<jit::VAddTuple<float>, CPUPlace>::Cache().At(3);
|
|
auto f2 = jit::KernelFuncs<jit::VAddTuple<float>, CPUPlace>::Cache()[3];
|
|
EXPECT_TRUE(f1 != nullptr);
|
|
EXPECT_TRUE(f1 == f2);
|
|
|
|
auto f3 = jit::KernelFuncs<jit::VAddTuple<float>, CPUPlace>::Cache()[5];
|
|
#if defined(_WIN32) || defined(__APPLE__) || defined(__OSX__)
|
|
EXPECT_TRUE(f2 == f3);
|
|
#else
|
|
EXPECT_TRUE(f2 != f3);
|
|
#endif
|
|
}
|
|
|
|
TEST(JITKernel_helper, GetAllCandidateFuncs) {
|
|
auto funcs = jit::GetAllCandidateFuncs<jit::VExpTuple<float>, CPUPlace>(10);
|
|
auto kers = jit::GetAllCandidateKernels<jit::VExpTuple<float>, CPUPlace>(10);
|
|
EXPECT_EQ(funcs.size(), kers.size());
|
|
|
|
std::vector<float> x(10), tgt(10);
|
|
RandomVec<float>(10, x.data());
|
|
auto best = jit::GetDefaultBestFunc<jit::VExpTuple<float>, CPUPlace>(10);
|
|
best(x.data(), tgt.data(), 10);
|
|
for (auto f : funcs) {
|
|
std::vector<float> y(10);
|
|
f(x.data(), y.data(), 10);
|
|
ExpectEQ<float>(y.data(), tgt.data(), 10);
|
|
}
|
|
}
|
|
|
|
TEST(JITKernel_helper, pack_weights) {
|
|
const int N = 8 * 60, K = 2;
|
|
float src[K][N], yref[K][N], y[K * N];
|
|
float* x = &(src[0][0]);
|
|
float* ref = &(yref[0][0]);
|
|
for (int i = 0; i < N * K; ++i) {
|
|
*(x + i) = static_cast<float>(i);
|
|
}
|
|
int block = 0;
|
|
std::vector<int> groups;
|
|
if (paddle::platform::MayIUse(paddle::platform::avx512f)) {
|
|
block = ZMM_FLOAT_BLOCK;
|
|
groups.push_back(30);
|
|
} else {
|
|
block = YMM_FLOAT_BLOCK;
|
|
groups.insert(groups.end(), {14, 14, 14, 14, 4});
|
|
}
|
|
|
|
int offset = 0;
|
|
int acc = 0;
|
|
for (int g : groups) {
|
|
g = g * block;
|
|
for (int k = 0; k < K; ++k) {
|
|
for (int i = 0; i < g; ++i) {
|
|
*(ref + offset) = src[k][i + acc];
|
|
offset++;
|
|
}
|
|
}
|
|
acc += g;
|
|
}
|
|
|
|
jit::pack_weights<float>(x, y, N, K);
|
|
ExpectEQ<float>(y, ref, N * K);
|
|
}
|
|
|
|
TEST(JITKernel_helper, attr) {
|
|
std::ostringstream out;
|
|
// KernelTypes
|
|
out << jit::to_string(jit::kNone) << jit::to_string(jit::kCRFDecoding)
|
|
<< jit::to_string(jit::kEmbSeqPool) << jit::to_string(jit::kGRUH1)
|
|
<< jit::to_string(jit::kGRUHtPart1) << jit::to_string(jit::kGRUHtPart2)
|
|
<< jit::to_string(jit::kHSum) << jit::to_string(jit::kHMax)
|
|
<< jit::to_string(jit::kLSTMCtHt) << jit::to_string(jit::kLSTMC1H1)
|
|
<< jit::to_string(jit::kLayerNorm) << jit::to_string(jit::kMatMul)
|
|
<< jit::to_string(jit::kNCHW16CMulNC) << jit::to_string(jit::kSeqPool)
|
|
<< jit::to_string(jit::kSoftmax) << jit::to_string(jit::kVAdd)
|
|
<< jit::to_string(jit::kVAddBias) << jit::to_string(jit::kVAddRelu)
|
|
<< jit::to_string(jit::kVBroadcast) << jit::to_string(jit::kVCopy)
|
|
<< jit::to_string(jit::kVExp) << jit::to_string(jit::kVIdentity)
|
|
<< jit::to_string(jit::kVMul) << jit::to_string(jit::kVRelu)
|
|
<< jit::to_string(jit::kVScal) << jit::to_string(jit::kSgd)
|
|
<< jit::to_string(jit::kVSigmoid) << jit::to_string(jit::kVSquare)
|
|
<< jit::to_string(jit::kVSub) << jit::to_string(jit::kVTanh);
|
|
EXPECT_EQ(out.str().size(), 234UL);
|
|
|
|
// SeqPoolTypes
|
|
out.str("");
|
|
out << jit::to_string(jit::kSum) << jit::to_string(jit::kAvg)
|
|
<< jit::to_string(jit::kSqrt);
|
|
EXPECT_EQ(out.str().size(), 13UL);
|
|
|
|
EXPECT_EQ(jit::to_kerneltype("relu"), jit::kVRelu);
|
|
EXPECT_EQ(jit::to_kerneltype("Identity"), jit::kVIdentity);
|
|
EXPECT_EQ(jit::to_kerneltype("VEXP"), jit::kVExp);
|
|
EXPECT_EQ(jit::to_kerneltype("SigmoiD"), jit::kVSigmoid);
|
|
EXPECT_EQ(jit::to_kerneltype("VTanh"), jit::kVTanh);
|
|
|
|
out.str("");
|
|
out << jit::lstm_attr_t(8, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh);
|
|
EXPECT_EQ(out.str().size(), 89UL);
|
|
|
|
out.str("");
|
|
out << jit::gru_attr_t(8, jit::kVIdentity, jit::kVSigmoid);
|
|
EXPECT_EQ(out.str().size(), 52UL);
|
|
|
|
out.str("");
|
|
out << jit::seq_pool_attr_t(8, jit::SeqPoolType::kSum);
|
|
EXPECT_EQ(out.str().size(), 44UL);
|
|
|
|
out.str("");
|
|
out << jit::emb_seq_pool_attr_t(1, 2, 3, 4, 5, jit::SeqPoolType::kAvg);
|
|
EXPECT_EQ(out.str().size(), 93UL);
|
|
|
|
out.str("");
|
|
out << jit::sgd_attr_t(1, 2, 3, 4, 5);
|
|
EXPECT_EQ(out.str().size(), 81UL);
|
|
|
|
out.str("");
|
|
out << jit::matmul_attr_t(1, 2, 3);
|
|
EXPECT_EQ(out.str().size(), 14UL);
|
|
}
|
|
|
|
// test keys
|
|
TEST(JITKernel_key, int) {
|
|
EXPECT_TRUE(jit::JitCodeKey<int>(2) == jit::JitCodeKey<int>(2));
|
|
EXPECT_TRUE(jit::JitCodeKey<int>(2) == jit::JitCodeKey<int64_t>(2));
|
|
EXPECT_TRUE(jit::JitCodeKey<int>(2) != jit::JitCodeKey<int>(3));
|
|
}
|
|
|
|
TEST(JITKernel_key, gru) {
|
|
jit::gru_attr_t attr1(8, jit::kVSigmoid, jit::kVTanh);
|
|
jit::gru_attr_t attr2(8, jit::kVSigmoid, jit::kVTanh);
|
|
jit::gru_attr_t attr3(9, jit::kVSigmoid, jit::kVTanh);
|
|
jit::gru_attr_t attr4(9, jit::kVSigmoid, jit::kVIdentity);
|
|
jit::gru_attr_t attr5(9, jit::kVTanh, jit::kVIdentity);
|
|
|
|
auto key1 = jit::JitCodeKey<jit::gru_attr_t>(attr1);
|
|
auto key2 = jit::JitCodeKey<jit::gru_attr_t>(attr2);
|
|
auto key3 = jit::JitCodeKey<jit::gru_attr_t>(attr3);
|
|
auto key4 = jit::JitCodeKey<jit::gru_attr_t>(attr4);
|
|
auto key5 = jit::JitCodeKey<jit::gru_attr_t>(attr5);
|
|
|
|
EXPECT_TRUE(key1 == key2);
|
|
EXPECT_TRUE(key2 != key3);
|
|
EXPECT_TRUE(key2 != key4);
|
|
EXPECT_TRUE(key2 != key5);
|
|
EXPECT_TRUE(key3 != key4);
|
|
EXPECT_TRUE(key3 != key5);
|
|
EXPECT_TRUE(key4 != key5);
|
|
}
|
|
|
|
TEST(JITKernel_key, lstm) {
|
|
jit::lstm_attr_t attr1(8, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh);
|
|
jit::lstm_attr_t attr2(8, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh);
|
|
jit::lstm_attr_t attr3(9, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh);
|
|
jit::lstm_attr_t attr4(9, jit::kVRelu, jit::kVSigmoid, jit::kVTanh);
|
|
jit::lstm_attr_t attr5(9, jit::kVRelu, jit::kVSigmoid, jit::kVTanh, true);
|
|
jit::lstm_attr_t attr6(9, jit::kVRelu, jit::kVSigmoid, jit::kVTanh, true);
|
|
|
|
auto key1 = jit::JitCodeKey<jit::lstm_attr_t>(attr1);
|
|
auto key2 = jit::JitCodeKey<jit::lstm_attr_t>(attr2);
|
|
auto key3 = jit::JitCodeKey<jit::lstm_attr_t>(attr3);
|
|
auto key4 = jit::JitCodeKey<jit::lstm_attr_t>(attr4);
|
|
auto key5 = jit::JitCodeKey<jit::lstm_attr_t>(attr5);
|
|
auto key6 = jit::JitCodeKey<jit::lstm_attr_t>(attr6);
|
|
|
|
EXPECT_TRUE(key1 == key2);
|
|
EXPECT_TRUE(key2 != key3);
|
|
EXPECT_TRUE(key2 != key4);
|
|
EXPECT_TRUE(key2 != key5);
|
|
EXPECT_TRUE(key3 != key4);
|
|
EXPECT_TRUE(key3 != key5);
|
|
EXPECT_TRUE(key4 != key5);
|
|
EXPECT_TRUE(key5 == key6);
|
|
}
|
|
|
|
TEST(JITKernel_key, seq_pool) {
|
|
jit::seq_pool_attr_t attr1(2, jit::SeqPoolType::kSum, 1);
|
|
jit::seq_pool_attr_t attr2(2, jit::SeqPoolType::kSum, 3);
|
|
jit::seq_pool_attr_t attr3(3, jit::SeqPoolType::kSum, 3);
|
|
jit::seq_pool_attr_t attr4(3, jit::SeqPoolType::kAvg, 3);
|
|
|
|
auto key1 = jit::JitCodeKey<jit::seq_pool_attr_t>(attr1);
|
|
auto key2 = jit::JitCodeKey<jit::seq_pool_attr_t>(attr2);
|
|
auto key3 = jit::JitCodeKey<jit::seq_pool_attr_t>(attr3);
|
|
auto key4 = jit::JitCodeKey<jit::seq_pool_attr_t>(attr4);
|
|
|
|
EXPECT_TRUE(key1 == key2);
|
|
EXPECT_TRUE(key2 != key3);
|
|
EXPECT_TRUE(key2 != key4);
|
|
EXPECT_TRUE(key3 != key4);
|
|
}
|
|
|
|
TEST(JITKernel_key, matmul) {
|
|
jit::matmul_attr_t attr1(1, 2, 3);
|
|
jit::matmul_attr_t attr2(1, 2, 3);
|
|
jit::matmul_attr_t attr3(1, 3, 3);
|
|
jit::matmul_attr_t attr4(2, 3, 4);
|
|
|
|
auto key1 = jit::JitCodeKey<jit::matmul_attr_t>(attr1);
|
|
auto key2 = jit::JitCodeKey<jit::matmul_attr_t>(attr2);
|
|
auto key3 = jit::JitCodeKey<jit::matmul_attr_t>(attr3);
|
|
auto key4 = jit::JitCodeKey<jit::matmul_attr_t>(attr4);
|
|
|
|
EXPECT_TRUE(key1 == key2);
|
|
EXPECT_TRUE(key2 != key3);
|
|
EXPECT_TRUE(key2 != key4);
|
|
EXPECT_TRUE(key3 != key4);
|
|
}
|
|
|
|
TEST(JITKernel_key, emb_seq_pool) {
|
|
jit::emb_seq_pool_attr_t attr1(1, 2, 3, 4, 5, jit::SeqPoolType::kSum);
|
|
jit::emb_seq_pool_attr_t attr2(1, 2, 3, 4, 5, jit::SeqPoolType::kSum);
|
|
jit::emb_seq_pool_attr_t attr3(10, 2, 9, 8, 7, jit::SeqPoolType::kAvg);
|
|
jit::emb_seq_pool_attr_t attr4(10, 3, 9, 8, 7, jit::SeqPoolType::kSum);
|
|
jit::emb_seq_pool_attr_t attr5(1, 6, 3, 4, 5, jit::SeqPoolType::kSum);
|
|
|
|
auto key1 = jit::JitCodeKey<jit::emb_seq_pool_attr_t>(attr1);
|
|
auto key2 = jit::JitCodeKey<jit::emb_seq_pool_attr_t>(attr2);
|
|
auto key3 = jit::JitCodeKey<jit::emb_seq_pool_attr_t>(attr3);
|
|
auto key4 = jit::JitCodeKey<jit::emb_seq_pool_attr_t>(attr4);
|
|
auto key5 = jit::JitCodeKey<jit::emb_seq_pool_attr_t>(attr5);
|
|
|
|
EXPECT_TRUE(key1 == key2);
|
|
EXPECT_TRUE(key2 == key3);
|
|
EXPECT_TRUE(key2 != key4);
|
|
EXPECT_TRUE(key2 != key5);
|
|
EXPECT_TRUE(key4 != key5);
|
|
}
|
|
|
|
TEST(JITKernel_key, sgd) {
|
|
jit::sgd_attr_t attr1(1, 2, 3, 4, 5);
|
|
jit::sgd_attr_t attr2(1, 2, 3, 4, 5);
|
|
jit::sgd_attr_t attr3(9, 8, 7, 4, 6);
|
|
jit::sgd_attr_t attr4(1, 2, 3, 6, 5);
|
|
jit::sgd_attr_t attr5(10, 9, 8, 7, 6);
|
|
|
|
auto key1 = jit::JitCodeKey<jit::sgd_attr_t>(attr1);
|
|
auto key2 = jit::JitCodeKey<jit::sgd_attr_t>(attr2);
|
|
auto key3 = jit::JitCodeKey<jit::sgd_attr_t>(attr3);
|
|
auto key4 = jit::JitCodeKey<jit::sgd_attr_t>(attr4);
|
|
auto key5 = jit::JitCodeKey<jit::sgd_attr_t>(attr5);
|
|
|
|
EXPECT_TRUE(key1 == key2);
|
|
EXPECT_TRUE(key2 == key3);
|
|
EXPECT_TRUE(key3 != key4);
|
|
EXPECT_TRUE(key3 != key5);
|
|
EXPECT_TRUE(key4 != key5);
|
|
}
|
|
|
|
// test kernerls
|
|
#define TestKernelVMul TestKernelXYZN
|
|
#define TestKernelVAdd TestKernelXYZN
|
|
#define TestKernelVAddRelu TestKernelXYZN
|
|
#define TestKernelVSub TestKernelXYZN
|
|
|
|
#define TestKernelVScal TestKernelAXYN
|
|
#define TestKernelVAddBias TestKernelAXYN
|
|
|
|
#define TestKernelVRelu TestKernelXYN
|
|
#define TestKernelVIdentity TestKernelXYN
|
|
#define TestKernelVSquare TestKernelXYN
|
|
#define TestKernelVExp TestKernelXYN
|
|
#define TestKernelVSigmoid TestKernelXYN
|
|
#define TestKernelVTanh TestKernelXYN
|
|
#define TestKernelVCopy TestKernelXYN
|
|
|
|
#define TestKernelHMax TestKernelXRN
|
|
#define TestKernelHSum TestKernelXRN
|
|
|
|
#define TestKernelLSTMCtHt TestKernelLSTM
|
|
#define TestKernelLSTMC1H1 TestKernelLSTM
|
|
|
|
#define TestKernelGRUH1 TestKernelGRU
|
|
#define TestKernelGRUHtPart1 TestKernelGRU
|
|
#define TestKernelGRUHtPart2 TestKernelGRU
|
|
|
|
#define TEST_CPU_KERNEL(kernel_type) \
|
|
TEST(JITKernel, kernel_type) { \
|
|
TestKernel##kernel_type<jit::kernel_type##Tuple<float>, CPUPlace>(); \
|
|
TestKernel##kernel_type<jit::kernel_type##Tuple<double>, CPUPlace>(); \
|
|
}
|
|
|
|
TEST_CPU_KERNEL(VMul);
|
|
TEST_CPU_KERNEL(VAdd);
|
|
TEST_CPU_KERNEL(VAddRelu);
|
|
TEST_CPU_KERNEL(VSub);
|
|
|
|
TEST_CPU_KERNEL(VScal);
|
|
TEST_CPU_KERNEL(VAddBias);
|
|
|
|
TEST_CPU_KERNEL(VRelu);
|
|
TEST_CPU_KERNEL(VIdentity);
|
|
TEST_CPU_KERNEL(VSquare);
|
|
TEST_CPU_KERNEL(VExp);
|
|
TEST_CPU_KERNEL(VSigmoid);
|
|
TEST_CPU_KERNEL(VTanh);
|
|
TEST_CPU_KERNEL(VCopy);
|
|
|
|
TEST_CPU_KERNEL(HMax);
|
|
TEST_CPU_KERNEL(HSum);
|
|
|
|
TEST_CPU_KERNEL(LSTMCtHt);
|
|
TEST_CPU_KERNEL(LSTMC1H1);
|
|
|
|
TEST_CPU_KERNEL(GRUH1);
|
|
TEST_CPU_KERNEL(GRUHtPart1);
|
|
TEST_CPU_KERNEL(GRUHtPart2);
|
|
|
|
TEST_CPU_KERNEL(NCHW16CMulNC);
|
|
TEST_CPU_KERNEL(LayerNorm);
|
|
TEST_CPU_KERNEL(CRFDecoding);
|
|
|
|
TEST_CPU_KERNEL(SeqPool);
|
|
TEST_CPU_KERNEL(EmbSeqPool);
|
|
TEST_CPU_KERNEL(MatMul);
|
|
TEST_CPU_KERNEL(Softmax);
|
|
TEST_CPU_KERNEL(Sgd);
|
|
TEST_CPU_KERNEL(VBroadcast);
|
|
|
|
TEST_CPU_KERNEL(StrideASum);
|
|
TEST_CPU_KERNEL(StrideScal);
|