|
|
|
@ -46,47 +46,51 @@ class TestAnakinEngine : public ::testing::Test {
|
|
|
|
|
|
|
|
|
|
void TestAnakinEngine::SetUp() {
|
|
|
|
|
engine_.reset(new AnakinEngine<NV, Precision::FP32>(true));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(TestAnakinEngine, Execute) {
|
|
|
|
|
engine_->AddOp("op1", "Dense", {"x"}, {"y"});
|
|
|
|
|
engine_->AddOpAttr("op1", "out_dim", 2);
|
|
|
|
|
engine_->AddOpAttr("op1", "bias_term", false);
|
|
|
|
|
engine_->AddOpAttr("op1", "axis", 1);
|
|
|
|
|
std::vector<int> shape = {1, 1, 1, 2};
|
|
|
|
|
Shape tmp_shape(shape);
|
|
|
|
|
// PBlock<NV> weight1(tmp_shape);
|
|
|
|
|
auto *weight1 =
|
|
|
|
|
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(tmp_shape);
|
|
|
|
|
// auto *weight1 = new PBlock<NV>(tmp_shape, AK_FLOAT);
|
|
|
|
|
|
|
|
|
|
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
|
|
|
|
|
cpu_data[0] = 2.;
|
|
|
|
|
weight1->d_tensor().set_shape(tmp_shape);
|
|
|
|
|
weight1->d_tensor().copy_from(weight1->h_tensor());
|
|
|
|
|
engine_->AddOpAttr("op1", "weight_1", *weight1);
|
|
|
|
|
|
|
|
|
|
TEST_F(TestAnakinEngine, Execute) {
|
|
|
|
|
engine_->AddOp("op1", "Dense", {"x"}, {"y"});
|
|
|
|
|
engine_->AddOpAttr("op1", "out_dim", 2);
|
|
|
|
|
engine_->AddOpAttr("op1", "bias_term", false);
|
|
|
|
|
engine_->AddOpAttr("op1", "axis", 1);
|
|
|
|
|
std::vector<int> shape = {1, 1, 1, 2};
|
|
|
|
|
Shape tmp_shape(shape);
|
|
|
|
|
auto *weight1 =
|
|
|
|
|
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(tmp_shape);
|
|
|
|
|
|
|
|
|
|
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
|
|
|
|
|
cpu_data[0] = 2.;
|
|
|
|
|
weight1->d_tensor().set_shape(tmp_shape);
|
|
|
|
|
weight1->d_tensor().copy_from(weight1->h_tensor());
|
|
|
|
|
engine_->AddOpAttr("op1", "weight_1", *weight1);
|
|
|
|
|
|
|
|
|
|
engine_->Freeze();
|
|
|
|
|
engine_->SetInputShape("x", {1, 1, 1, 1});
|
|
|
|
|
engine_->Optimize();
|
|
|
|
|
engine_->InitGraph();
|
|
|
|
|
framework::LoDTensor x;
|
|
|
|
|
framework::LoDTensor y;
|
|
|
|
|
x.Resize({1, 1, 1, 1});
|
|
|
|
|
y.Resize({1, 1, 1, 2});
|
|
|
|
|
auto *x_data = x.mutable_data<float>(platform::CUDAPlace());
|
|
|
|
|
float x_data_cpu[] = {1.};
|
|
|
|
|
cudaMemcpy(x_data, x_data_cpu, sizeof(float), cudaMemcpyHostToDevice);
|
|
|
|
|
|
|
|
|
|
std::map<std::string, framework::LoDTensor *> inputs = {{"x", &x}};
|
|
|
|
|
auto *y_data = y.mutable_data<float>(platform::CUDAPlace());
|
|
|
|
|
std::map<std::string, framework::LoDTensor *> outputs = {{"y", &y}};
|
|
|
|
|
|
|
|
|
|
engine_->Execute(inputs, outputs);
|
|
|
|
|
auto *y_data_gpu = y_data;
|
|
|
|
|
float y_data_cpu[2];
|
|
|
|
|
cudaMemcpy(y_data_cpu, y_data_gpu, sizeof(float) * 2,
|
|
|
|
|
cudaMemcpyDeviceToHost);
|
|
|
|
|
LOG(INFO) << "output value: " << y_data_cpu[0] << ", " << y_data_cpu[1];
|
|
|
|
|
}
|
|
|
|
|
engine_->Freeze();
|
|
|
|
|
// PTuple<int> input_shape = {1};
|
|
|
|
|
// engine_->AddOpAttr("x", "input_shape", input_shape);
|
|
|
|
|
engine_->SetInputShape("x", {1, 1, 1, 1});
|
|
|
|
|
engine_->Optimize();
|
|
|
|
|
engine_->InitGraph();
|
|
|
|
|
framework::LoDTensor x;
|
|
|
|
|
framework::LoDTensor y;
|
|
|
|
|
x.Resize({1, 1, 1, 1});
|
|
|
|
|
y.Resize({1, 1, 1, 2});
|
|
|
|
|
auto *x_data = x.mutable_data<float>(platform::CUDAPlace());
|
|
|
|
|
float x_data_cpu[] = {1.};
|
|
|
|
|
cudaMemcpy(x_data, x_data_cpu, sizeof(float), cudaMemcpyHostToDevice);
|
|
|
|
|
|
|
|
|
|
std::map<std::string, framework::LoDTensor *> inputs = {{"x", &x}};
|
|
|
|
|
auto *y_data = y.mutable_data<float>(platform::CUDAPlace());
|
|
|
|
|
std::map<std::string, framework::LoDTensor *> outputs = {{"y", &y}};
|
|
|
|
|
|
|
|
|
|
engine_->Execute(inputs, outputs);
|
|
|
|
|
auto *y_data_gpu = y_data;
|
|
|
|
|
float y_data_cpu[2];
|
|
|
|
|
cudaMemcpy(y_data_cpu, y_data_gpu, sizeof(float) * 2, cudaMemcpyDeviceToHost);
|
|
|
|
|
LOG(INFO) << "output value: " << y_data_cpu[0] << ", " << y_data_cpu[1];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace anakin
|
|
|
|
|
} // namespace inference
|
|
|
|
|
} // namespace paddle
|
|
|
|
|