|
|
|
@ -49,7 +49,7 @@ struct AnalysisConfig : public NativeConfig {
|
|
|
|
|
bool use_feed_fetch_ops{true};
|
|
|
|
|
|
|
|
|
|
void EnableTensorRtEngine(int workspace_size = 1 << 20,
|
|
|
|
|
int max_batch_size = 1);
|
|
|
|
|
int max_batch_size = 1, int min_subgraph_size = 3);
|
|
|
|
|
bool use_tensorrt() const { return use_tensorrt_; }
|
|
|
|
|
|
|
|
|
|
void EnableMKLDNN();
|
|
|
|
@ -69,8 +69,19 @@ struct AnalysisConfig : public NativeConfig {
|
|
|
|
|
bool use_tensorrt_{false};
|
|
|
|
|
bool use_mkldnn_{false};
|
|
|
|
|
std::unordered_set<std::string> mkldnn_enabled_op_types_;
|
|
|
|
|
// For workspace_size, refer it from here:
|
|
|
|
|
// https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
|
|
|
|
|
int tensorrt_workspace_size_;
|
|
|
|
|
// While TensorRT allows an engine optimized for a given max batch size
|
|
|
|
|
// to run at any smaller size, the performance for those smaller
|
|
|
|
|
// sizes may not be as well-optimized. Therefore, Max batch is best
|
|
|
|
|
// equivalent to the runtime batch size.
|
|
|
|
|
int tensorrt_max_batchsize_;
|
|
|
|
|
// We transform the Ops that can be converted into TRT layer in the model,
|
|
|
|
|
// and aggregate these Ops into subgraphs for TRT execution.
|
|
|
|
|
// We set this variable to control the minimum number of nodes in the
|
|
|
|
|
// subgraph, 3 as default value.
|
|
|
|
|
int tensorrt_min_subgraph_size_{3};
|
|
|
|
|
std::unique_ptr<PassStrategy> pass_builder_;
|
|
|
|
|
bool model_from_memory_{false};
|
|
|
|
|
};
|
|
|
|
|