|
|
|
@ -18,26 +18,36 @@
|
|
|
|
|
|
|
|
|
|
namespace paddle {
|
|
|
|
|
|
|
|
|
|
PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor(
|
|
|
|
|
template <typename Target>
|
|
|
|
|
PaddleInferenceAnakinPredictor<Target>::PaddleInferenceAnakinPredictor(
|
|
|
|
|
const AnakinConfig &config) {
|
|
|
|
|
CHECK(Init(config));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool PaddleInferenceAnakinPredictor::Init(const AnakinConfig &config) {
|
|
|
|
|
template <typename Target>
|
|
|
|
|
bool PaddleInferenceAnakinPredictor<Target>::Init(const AnakinConfig &config) {
|
|
|
|
|
if (!(graph_.load(config.model_file))) {
|
|
|
|
|
LOG(FATAL) << "fail to load graph from " << config.model_file;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
graph_.ResetBatchSize("input_0", config.max_batch_size);
|
|
|
|
|
auto inputs = graph_.get_ins();
|
|
|
|
|
for (auto &input_str : inputs) {
|
|
|
|
|
graph_.ResetBatchSize(input_str, config.max_batch_size);
|
|
|
|
|
}
|
|
|
|
|
// optimization for graph
|
|
|
|
|
if (!(graph_.Optimize())) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
// construct executer
|
|
|
|
|
executor_.init(graph_);
|
|
|
|
|
if (executor_p_ == nullptr) {
|
|
|
|
|
executor_p_ = new anakin::Net<Target, anakin::saber::AK_FLOAT,
|
|
|
|
|
anakin::Precision::FP32>(graph_, true);
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool PaddleInferenceAnakinPredictor::Run(
|
|
|
|
|
template <typename Target>
|
|
|
|
|
bool PaddleInferenceAnakinPredictor<Target>::Run(
|
|
|
|
|
const std::vector<PaddleTensor> &inputs,
|
|
|
|
|
std::vector<PaddleTensor> *output_data, int batch_size) {
|
|
|
|
|
for (const auto &input : inputs) {
|
|
|
|
@ -46,7 +56,29 @@ bool PaddleInferenceAnakinPredictor::Run(
|
|
|
|
|
<< "'s type is not float";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
auto d_tensor_in_p = executor_.get_in(input.name);
|
|
|
|
|
auto d_tensor_in_p = executor_p_->get_in(input.name);
|
|
|
|
|
auto net_shape = d_tensor_in_p->valid_shape();
|
|
|
|
|
if (net_shape.size() != input.shape.size()) {
|
|
|
|
|
LOG(ERROR) << " input " << input.name
|
|
|
|
|
<< "'s shape size should be equal to that of net";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
int sum = 1;
|
|
|
|
|
for_each(input.shape.begin(), input.shape.end(), [&](int n) { sum *= n; });
|
|
|
|
|
if (sum > net_shape.count()) {
|
|
|
|
|
graph_.Reshape(input.name, input.shape);
|
|
|
|
|
delete executor_p_;
|
|
|
|
|
executor_p_ = new anakin::Net<Target, anakin::saber::AK_FLOAT,
|
|
|
|
|
anakin::Precision::FP32>(graph_, true);
|
|
|
|
|
d_tensor_in_p = executor_p_->get_in(input.name);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
anakin::saber::Shape tmp_shape;
|
|
|
|
|
for (auto s : input.shape) {
|
|
|
|
|
tmp_shape.push_back(s);
|
|
|
|
|
}
|
|
|
|
|
d_tensor_in_p->reshape(tmp_shape);
|
|
|
|
|
|
|
|
|
|
float *d_data_p = d_tensor_in_p->mutable_data();
|
|
|
|
|
if (cudaMemcpy(d_data_p, static_cast<float *>(input.data.data()),
|
|
|
|
|
d_tensor_in_p->valid_size() * sizeof(float),
|
|
|
|
@ -56,16 +88,17 @@ bool PaddleInferenceAnakinPredictor::Run(
|
|
|
|
|
}
|
|
|
|
|
cudaStreamSynchronize(NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
executor_.prediction();
|
|
|
|
|
cudaDeviceSynchronize();
|
|
|
|
|
executor_p_->prediction();
|
|
|
|
|
cudaDeviceSynchronize();
|
|
|
|
|
|
|
|
|
|
if (output_data->empty()) {
|
|
|
|
|
LOG(ERROR) << "At least one output should be set with tensors' names.";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
for (auto &output : *output_data) {
|
|
|
|
|
auto *tensor = executor_.get_out(output.name);
|
|
|
|
|
output.shape = tensor->shape();
|
|
|
|
|
auto *tensor = executor_p_->get_out(output.name);
|
|
|
|
|
output.shape = tensor->valid_shape();
|
|
|
|
|
if (output.data.length() < tensor->valid_size() * sizeof(float)) {
|
|
|
|
|
output.data.Resize(tensor->valid_size() * sizeof(float));
|
|
|
|
|
}
|
|
|
|
@ -81,19 +114,23 @@ bool PaddleInferenceAnakinPredictor::Run(
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
|
|
|
|
|
&PaddleInferenceAnakinPredictor::get_executer() {
|
|
|
|
|
return executor_;
|
|
|
|
|
template <typename Target>
|
|
|
|
|
anakin::Net<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
|
|
|
|
|
&PaddleInferenceAnakinPredictor<Target>::get_executer() {
|
|
|
|
|
return *executor_p_;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// the cloned new Predictor of anakin share the same net weights from original
|
|
|
|
|
// Predictor
|
|
|
|
|
std::unique_ptr<PaddlePredictor> PaddleInferenceAnakinPredictor::Clone() {
|
|
|
|
|
template <typename Target>
|
|
|
|
|
std::unique_ptr<PaddlePredictor>
|
|
|
|
|
PaddleInferenceAnakinPredictor<Target>::Clone() {
|
|
|
|
|
VLOG(3) << "Anakin Predictor::clone";
|
|
|
|
|
std::unique_ptr<PaddlePredictor> cls(new PaddleInferenceAnakinPredictor());
|
|
|
|
|
std::unique_ptr<PaddlePredictor> cls(
|
|
|
|
|
new PaddleInferenceAnakinPredictor<Target>());
|
|
|
|
|
// construct executer from other graph
|
|
|
|
|
auto anakin_predictor_p =
|
|
|
|
|
dynamic_cast<PaddleInferenceAnakinPredictor *>(cls.get());
|
|
|
|
|
dynamic_cast<PaddleInferenceAnakinPredictor<Target> *>(cls.get());
|
|
|
|
|
if (!anakin_predictor_p) {
|
|
|
|
|
LOG(ERROR) << "fail to call Init";
|
|
|
|
|
return nullptr;
|
|
|
|
@ -103,14 +140,28 @@ std::unique_ptr<PaddlePredictor> PaddleInferenceAnakinPredictor::Clone() {
|
|
|
|
|
return std::move(cls);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template class PaddleInferenceAnakinPredictor<anakin::NV>;
|
|
|
|
|
template class PaddleInferenceAnakinPredictor<anakin::X86>;
|
|
|
|
|
|
|
|
|
|
// A factory to help create difference predictor.
|
|
|
|
|
template <>
|
|
|
|
|
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
|
|
|
|
|
AnakinConfig, PaddleEngineKind::kAnakin>(const AnakinConfig &config) {
|
|
|
|
|
VLOG(3) << "Anakin Predictor create.";
|
|
|
|
|
std::unique_ptr<PaddlePredictor> x(
|
|
|
|
|
new PaddleInferenceAnakinPredictor(config));
|
|
|
|
|
return x;
|
|
|
|
|
}
|
|
|
|
|
if (config.target_type == AnakinConfig::NVGPU) {
|
|
|
|
|
VLOG(3) << "Anakin Predictor create on [ NVIDIA GPU ].";
|
|
|
|
|
std::unique_ptr<PaddlePredictor> x(
|
|
|
|
|
new PaddleInferenceAnakinPredictor<anakin::NV>(config));
|
|
|
|
|
return x;
|
|
|
|
|
} else if (config.target_type == AnakinConfig::X86) {
|
|
|
|
|
VLOG(3) << "Anakin Predictor create on [ Intel X86 ].";
|
|
|
|
|
std::unique_ptr<PaddlePredictor> x(
|
|
|
|
|
new PaddleInferenceAnakinPredictor<anakin::X86>(config));
|
|
|
|
|
return x;
|
|
|
|
|
} else {
|
|
|
|
|
VLOG(3) << "Anakin Predictor create on unknown platform.";
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
} // namespace paddle
|
|
|
|
|