|
|
|
@ -97,23 +97,7 @@ class AnakinEngineOp : public framework::OperatorBase {
|
|
|
|
|
if (param_names_.count(x)) continue;
|
|
|
|
|
auto &t =
|
|
|
|
|
inference::analysis::GetFromScope<framework::LoDTensor>(scope, x);
|
|
|
|
|
/*
|
|
|
|
|
auto t_shape = framework::vectorize(t.dims());
|
|
|
|
|
auto *anakin_input = engine->Net()->get_in(x);
|
|
|
|
|
auto net_shape = anakin_input->shape();
|
|
|
|
|
size_t anakin_net_input_size = net_shape.count() * sizeof(float);
|
|
|
|
|
size_t fluid_input_size = t.memory_size();
|
|
|
|
|
|
|
|
|
|
if (fluid_input_size < anakin_net_input_size) {
|
|
|
|
|
framework::LoDTensor temp_t;
|
|
|
|
|
auto t_dims = t.dims();
|
|
|
|
|
temp_t.Resize(t_dims);
|
|
|
|
|
TensorCopySync(t, dev_place, &temp_t);
|
|
|
|
|
t.Resize(framework::make_ddim(net_shape));
|
|
|
|
|
t.mutable_data<float>(dev_place);
|
|
|
|
|
TensorCopySync(temp_t, dev_place, &t);
|
|
|
|
|
}
|
|
|
|
|
*/
|
|
|
|
|
inputs.insert({x, &t});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -136,6 +120,41 @@ class AnakinEngineOp : public framework::OperatorBase {
|
|
|
|
|
inference::Singleton<inference::anakin::AnakinEngineManager>::Global()
|
|
|
|
|
.Get(engine_key_);
|
|
|
|
|
}
|
|
|
|
|
// BUG here, detect that the tensor data pointer here will change sometime.
|
|
|
|
|
// Will fix it later.
|
|
|
|
|
/*
|
|
|
|
|
// For share with the tensor from fluid, We do the net init in the first net
|
|
|
|
|
precit.
|
|
|
|
|
if (!anakin_engine_->IsInit()) {
|
|
|
|
|
auto temp_max_input_shape = anakin_engine_->GetMaxInputShape();
|
|
|
|
|
anakin_engine_->AllocTmpMem();
|
|
|
|
|
for(auto& input : Inputs("Xs")) {
|
|
|
|
|
if (param_names_.count(input)) continue;
|
|
|
|
|
platform::CUDAPlace
|
|
|
|
|
gpu_place(boost::get<platform::CUDAPlace>(dev_place).device);
|
|
|
|
|
auto *input_var = scope.FindVar(input);
|
|
|
|
|
auto input_tensor = input_var->GetMutable<framework::LoDTensor>();
|
|
|
|
|
auto input_max_shape = temp_max_input_shape[input];
|
|
|
|
|
|
|
|
|
|
framework::LoDTensor temp_t;
|
|
|
|
|
auto t_dims = input_tensor->dims();
|
|
|
|
|
temp_t.Resize(t_dims);
|
|
|
|
|
TensorCopySync(*input_tensor, dev_place, &temp_t);
|
|
|
|
|
input_tensor->Resize(framework::make_ddim(input_max_shape));
|
|
|
|
|
input_tensor->mutable_data<float>(dev_place);
|
|
|
|
|
TensorCopySync(temp_t, dev_place, input_tensor);
|
|
|
|
|
|
|
|
|
|
auto* input_data = input_tensor->mutable_data<float>(gpu_place);
|
|
|
|
|
auto* anakin_input = anakin_engine_->Net()->get_in(input);
|
|
|
|
|
|
|
|
|
|
::anakin::saber::Tensor<::anakin::saber::NV>
|
|
|
|
|
tmp_anakin_tensor(input_data,
|
|
|
|
|
::anakin::saber::NV(), 0, input_max_shape);
|
|
|
|
|
anakin_input->share_from(tmp_anakin_tensor);
|
|
|
|
|
}
|
|
|
|
|
anakin_engine_->InitGraph();
|
|
|
|
|
}
|
|
|
|
|
*/
|
|
|
|
|
return anakin_engine_;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|