!14584 gpu trt converter
From: @wilfchen Reviewed-by: @limingqi107,@cristoval Signed-off-by:pull/14584/MERGE
commit
675661726b
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,89 @@
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_OPTITIMIZER_TRT_CONVERTER_CONTEXT_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_OPTITIMIZER_TRT_CONVERTER_CONTEXT_H_
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <NvInfer.h>
|
||||
#include "base/base.h"
|
||||
#include "ir/anf.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "backend/optimizer/trt_pass/layer_input.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace opt {
|
||||
class TrtConverterContext : public std::enable_shared_from_this<TrtConverterContext> {
|
||||
public:
|
||||
explicit TrtConverterContext(FuncGraphPtr fg)
|
||||
: func_graph_(fg),
|
||||
batch_size_(1),
|
||||
workspace_size_(4UL << 30),
|
||||
builder_(nullptr),
|
||||
network_(nullptr),
|
||||
config_(nullptr),
|
||||
engine_(nullptr) {}
|
||||
~TrtConverterContext() = default;
|
||||
|
||||
bool Init();
|
||||
|
||||
// Parser KernelGraph to trt graph
|
||||
bool Parser();
|
||||
|
||||
// Serialize trt models.
|
||||
bool Serialize(std::string *model);
|
||||
|
||||
// Get trt graph inputs without weights. The inputs keep same order as binding name.
|
||||
std::vector<AnfNodePtr> GetGraphInputs();
|
||||
|
||||
// Get trt graph outputs. All outputs are flatten to vector with concret shape.
|
||||
std::vector<session::KernelWithIndex> GetGraphOutputs();
|
||||
|
||||
// Store trt layer outputs to the cache.
|
||||
bool StoreLayerOutput(const AnfNodePtr &node, const std::vector<LayerInput> &inputs);
|
||||
|
||||
// Get trt layer inputs from the cache.
|
||||
bool LoadLayerInput(const AnfNodePtr &node, std::vector<LayerInput> *inputs);
|
||||
|
||||
// Create and keep temporary weight, as constant folding demanding new weight excluded in graph,
|
||||
// which should release until building finish.
|
||||
std::shared_ptr<tensor::Tensor> CreateTempWeight(const TypeId &type, const std::vector<size_t> &shape);
|
||||
|
||||
std::shared_ptr<nvinfer1::INetworkDefinition> network() const { return network_; }
|
||||
|
||||
private:
|
||||
bool InitInputTable();
|
||||
bool InitValueNodeTable();
|
||||
|
||||
FuncGraphPtr func_graph_;
|
||||
uint32_t batch_size_;
|
||||
size_t workspace_size_;
|
||||
std::shared_ptr<nvinfer1::IBuilder> builder_;
|
||||
std::shared_ptr<nvinfer1::INetworkDefinition> network_;
|
||||
std::shared_ptr<nvinfer1::IBuilderConfig> config_;
|
||||
std::shared_ptr<nvinfer1::ICudaEngine> engine_;
|
||||
|
||||
// Cache (AnfNode + output_index : ILayer output).
|
||||
std::unordered_map<AnfNodePtr, std::unordered_map<size_t, LayerInput>> output_map_;
|
||||
std::vector<std::shared_ptr<tensor::Tensor>> temp_weights_;
|
||||
};
|
||||
} // namespace opt
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_OPTITIMIZER_TRT_CONVERTER_HELPER_H_
|
Loading…
Reference in new issue