|
|
|
@ -20,19 +20,22 @@
|
|
|
|
|
|
|
|
|
|
namespace mindspore {
|
|
|
|
|
|
|
|
|
|
void BoardcaseAdd(const float *a, const float b, float *c, const int size) {
|
|
|
|
|
template <class T>
|
|
|
|
|
static void BoardcaseAdd(const T *a, const T b, T *c, const int size) {
|
|
|
|
|
for (int i = 0; i < size; i++) {
|
|
|
|
|
c[i] = a[i] + b;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ElementAdd(const float *a, const float *b, float *c, const int size) {
|
|
|
|
|
template <class T>
|
|
|
|
|
static void ElementAdd(const T *a, const T *b, T *c, const int size) {
|
|
|
|
|
for (int i = 0; i < size; i++) {
|
|
|
|
|
c[i] = a[i] + b[i];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool DataCompare(const float *a, const float *b, const int size, const float accuracy = 1e-4) {
|
|
|
|
|
template <class T>
|
|
|
|
|
static bool DataCompare(const T *a, const T *b, const int size, const float accuracy = 1e-4) {
|
|
|
|
|
for (int i = 0; i < size; i++) {
|
|
|
|
|
auto diff = fabs(a[i] - b[i]);
|
|
|
|
|
if (diff > accuracy) {
|
|
|
|
@ -43,36 +46,40 @@ bool DataCompare(const float *a, const float *b, const int size, const float acc
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void InitData(void *data, const int size) {
|
|
|
|
|
float *data_float = reinterpret_cast<float *>(data);
|
|
|
|
|
template <class T>
|
|
|
|
|
static void InitData(void *data, const int size) {
|
|
|
|
|
T *data_float = reinterpret_cast<T *>(data);
|
|
|
|
|
static unsigned int seed = 123;
|
|
|
|
|
for (int i = 0; i < size; i++) {
|
|
|
|
|
data_float[i] = static_cast<int>(rand_r(&seed)) % 100;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void LogData(void *data, const int size, const std::string prefix) {
|
|
|
|
|
template <class T>
|
|
|
|
|
static void LogData(void *data, const int size, const std::string prefix) {
|
|
|
|
|
std::cout << prefix;
|
|
|
|
|
float *data_float = reinterpret_cast<float *>(data);
|
|
|
|
|
T *data_float = reinterpret_cast<T *>(data);
|
|
|
|
|
for (int i = 0; i < size; i++) {
|
|
|
|
|
std::cout << data_float[i] << ",";
|
|
|
|
|
}
|
|
|
|
|
std::cout << std::endl;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b) {
|
|
|
|
|
template <class T>
|
|
|
|
|
static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b) {
|
|
|
|
|
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
|
|
|
|
|
auto allocator = ocl_runtime->GetAllocator();
|
|
|
|
|
|
|
|
|
|
bool is_bias_add = shape_b.empty();
|
|
|
|
|
auto tensorType = lite::TensorCategory(schema::NodeType_ValueNode);
|
|
|
|
|
|
|
|
|
|
lite::Tensor *tensor_a =
|
|
|
|
|
new (std::nothrow) lite::Tensor(kNumberTypeFloat32, shape_a, schema::Format_NHWC4, tensorType);
|
|
|
|
|
lite::Tensor *tensor_b =
|
|
|
|
|
new (std::nothrow) lite::Tensor(kNumberTypeFloat32, shape_b, schema::Format_NHWC4, tensorType);
|
|
|
|
|
lite::Tensor *tensor_c =
|
|
|
|
|
new (std::nothrow) lite::Tensor(kNumberTypeFloat32, shape_a, schema::Format_NHWC4, tensorType);
|
|
|
|
|
auto data_type = kNumberTypeFloat32;
|
|
|
|
|
if (sizeof(T) == 2) {
|
|
|
|
|
data_type = kNumberTypeFloat16;
|
|
|
|
|
ocl_runtime->SetFp16Enable(true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lite::Tensor *tensor_a = new (std::nothrow) lite::Tensor(data_type, shape_a, schema::Format_NHWC4);
|
|
|
|
|
lite::Tensor *tensor_b = new (std::nothrow) lite::Tensor(data_type, shape_b, schema::Format_NHWC4);
|
|
|
|
|
lite::Tensor *tensor_c = new (std::nothrow) lite::Tensor(data_type, shape_a, schema::Format_NHWC4);
|
|
|
|
|
if (tensor_a == nullptr || tensor_b == nullptr || tensor_c == nullptr) {
|
|
|
|
|
MS_LOG(ERROR) << "Create tensor failed!";
|
|
|
|
|
delete tensor_a;
|
|
|
|
@ -84,10 +91,10 @@ void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b)
|
|
|
|
|
int64_t element_num = tensor_a->ElementsC4Num();
|
|
|
|
|
int64_t element_num_b = is_bias_add ? 1 : tensor_b->ElementsC4Num();
|
|
|
|
|
|
|
|
|
|
float *data_a = new (std::nothrow) float[element_num];
|
|
|
|
|
float *data_b = new (std::nothrow) float[element_num_b];
|
|
|
|
|
float *data_c_cpu = new (std::nothrow) float[element_num];
|
|
|
|
|
float *data_c_ocl = new (std::nothrow) float[element_num];
|
|
|
|
|
T *data_a = new (std::nothrow) T[element_num];
|
|
|
|
|
T *data_b = new (std::nothrow) T[element_num_b];
|
|
|
|
|
T *data_c_cpu = new (std::nothrow) T[element_num];
|
|
|
|
|
T *data_c_ocl = new (std::nothrow) T[element_num];
|
|
|
|
|
if (data_a == nullptr || data_b == nullptr || data_c_cpu == nullptr || data_c_ocl == nullptr) {
|
|
|
|
|
MS_LOG(ERROR) << "Create buffer failed!";
|
|
|
|
|
delete tensor_a;
|
|
|
|
@ -100,12 +107,12 @@ void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b)
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
InitData(data_a, element_num);
|
|
|
|
|
InitData(data_b, element_num_b);
|
|
|
|
|
memset(data_c_ocl, 0, sizeof(float) * element_num);
|
|
|
|
|
InitData<T>(data_a, element_num);
|
|
|
|
|
InitData<T>(data_b, element_num_b);
|
|
|
|
|
memset(data_c_ocl, 0, sizeof(T) * element_num);
|
|
|
|
|
|
|
|
|
|
if (is_bias_add) {
|
|
|
|
|
BoardcaseAdd(data_a, static_cast<float *>(data_b)[0], data_c_cpu, element_num);
|
|
|
|
|
BoardcaseAdd(data_a, static_cast<T *>(data_b)[0], data_c_cpu, element_num);
|
|
|
|
|
} else {
|
|
|
|
|
ElementAdd(data_a, data_b, data_c_cpu, element_num);
|
|
|
|
|
}
|
|
|
|
@ -115,11 +122,12 @@ void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b)
|
|
|
|
|
inputs.push_back(tensor_b);
|
|
|
|
|
} else {
|
|
|
|
|
tensor_b->MallocData();
|
|
|
|
|
memcpy(tensor_b->MutableData(), data_b, sizeof(float));
|
|
|
|
|
memcpy(tensor_b->MutableData(), data_b, sizeof(T));
|
|
|
|
|
}
|
|
|
|
|
std::vector<lite::Tensor *> outputs = {tensor_c};
|
|
|
|
|
|
|
|
|
|
ArithmeticParameter *param = new (std::nothrow) ArithmeticParameter();
|
|
|
|
|
param->broadcasting_ = is_bias_add;
|
|
|
|
|
if (param == nullptr) {
|
|
|
|
|
MS_LOG(ERROR) << "Create parameter failed!";
|
|
|
|
|
delete tensor_a;
|
|
|
|
@ -170,19 +178,19 @@ void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b)
|
|
|
|
|
}
|
|
|
|
|
kernel->Init();
|
|
|
|
|
|
|
|
|
|
memcpy(inputs[0]->MutableData(), data_a, sizeof(float) * element_num);
|
|
|
|
|
memcpy(inputs[0]->MutableData(), data_a, sizeof(T) * element_num);
|
|
|
|
|
if (!is_bias_add) {
|
|
|
|
|
memcpy(inputs[1]->MutableData(), data_b, sizeof(float) * element_num_b);
|
|
|
|
|
memcpy(inputs[1]->MutableData(), data_b, sizeof(T) * element_num_b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
kernel->Run();
|
|
|
|
|
|
|
|
|
|
memcpy(data_c_ocl, outputs[0]->MutableData(), sizeof(float) * element_num);
|
|
|
|
|
memcpy(data_c_ocl, outputs[0]->MutableData(), sizeof(T) * element_num);
|
|
|
|
|
|
|
|
|
|
LogData(data_a, 10, "Data A : ");
|
|
|
|
|
LogData(data_b, tensor_b->shape().empty() ? 1 : 10, "Data B : ");
|
|
|
|
|
LogData(data_c_cpu, 10, "Expect compute : ");
|
|
|
|
|
LogData(outputs[0]->MutableData(), 10, "OpenCL compute : ");
|
|
|
|
|
LogData<T>(data_a, 10, "Data A : ");
|
|
|
|
|
LogData<T>(data_b, tensor_b->shape().empty() ? 1 : 10, "Data B : ");
|
|
|
|
|
LogData<T>(data_c_cpu, 10, "Expect compute : ");
|
|
|
|
|
LogData<T>(outputs[0]->MutableData(), 10, "OpenCL compute : ");
|
|
|
|
|
bool cmp = DataCompare(data_c_cpu, data_c_ocl, element_num);
|
|
|
|
|
MS_LOG(INFO) << "Compare " << (cmp ? "success!" : "failed!");
|
|
|
|
|
EXPECT_EQ(true, cmp);
|
|
|
|
@ -210,16 +218,27 @@ class TestArithmeticOpenCL : public mindspore::CommonTest {
|
|
|
|
|
TestArithmeticOpenCL() {}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
TEST_F(TestArithmeticOpenCL, AddElementwiseTest) {
|
|
|
|
|
TEST_F(TestArithmeticOpenCL, AddElementwiseFP32) {
|
|
|
|
|
const std::vector<int> &shape_a = {1, 1024, 1024, 4};
|
|
|
|
|
const std::vector<int> &shape_b = {1, 1024, 1024, 4};
|
|
|
|
|
TestCase(shape_a, shape_b);
|
|
|
|
|
TestCase<float>(shape_a, shape_b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(TestArithmeticOpenCL, AddBroadcastTest) {
|
|
|
|
|
TEST_F(TestArithmeticOpenCL, AddBroadcastFP32) {
|
|
|
|
|
const std::vector<int> &shape_a = {1, 128, 128, 4};
|
|
|
|
|
const std::vector<int> &shape_b = {};
|
|
|
|
|
TestCase(shape_a, shape_b);
|
|
|
|
|
TestCase<float>(shape_a, shape_b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(TestArithmeticOpenCL, AddElementwiseFP16) {
|
|
|
|
|
const std::vector<int> &shape_a = {1, 1024, 1024, 4};
|
|
|
|
|
const std::vector<int> &shape_b = {1, 1024, 1024, 4};
|
|
|
|
|
TestCase<float16_t>(shape_a, shape_b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(TestArithmeticOpenCL, AddBroadcastFP16) {
|
|
|
|
|
const std::vector<int> &shape_a = {1, 128, 128, 4};
|
|
|
|
|
const std::vector<int> &shape_b = {};
|
|
|
|
|
TestCase<float16_t>(shape_a, shape_b);
|
|
|
|
|
}
|
|
|
|
|
} // namespace mindspore
|
|
|
|
|