Merge pull request #12449 from NHZlX/add_tensorrt_elementwise_add
Add tensorrt elementwise addfea/anakin-support-x86
commit
d7dd0868db
@ -0,0 +1,210 @@
|
|||||||
|
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#include "paddle/fluid/framework/op_registry.h"
|
||||||
|
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace inference {
|
||||||
|
namespace tensorrt {
|
||||||
|
|
||||||
|
class ElementwiseWeightOpConverter : public OpConverter {
|
||||||
|
public:
|
||||||
|
ElementwiseWeightOpConverter() {}
|
||||||
|
void operator()(const framework::proto::OpDesc& op,
|
||||||
|
const framework::Scope& scope, bool test_mode) override {
|
||||||
|
// Here the two nullptr looks strange, that's because the
|
||||||
|
// framework::OpDesc's constructor is strange.
|
||||||
|
framework::OpDesc op_desc(op, nullptr);
|
||||||
|
LOG(INFO) << "convert a fluid elementwise op to tensorrt IScaleLayer";
|
||||||
|
|
||||||
|
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
|
||||||
|
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight
|
||||||
|
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
|
||||||
|
|
||||||
|
auto* X = engine_->GetITensor(op_desc.Input("X").front());
|
||||||
|
nvinfer1::Dims dims_x = X->getDimensions();
|
||||||
|
PADDLE_ENFORCE(dims_x.nbDims >= 3);
|
||||||
|
|
||||||
|
auto* Y_v = scope.FindVar(op_desc.Input("Y").front());
|
||||||
|
PADDLE_ENFORCE_NOT_NULL(Y_v);
|
||||||
|
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
|
||||||
|
auto* weight_data = Y_t->mutable_data<float>(platform::CPUPlace());
|
||||||
|
auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE;
|
||||||
|
|
||||||
|
std::vector<int> dims_y = framework::vectorize2int(Y_t->dims());
|
||||||
|
if (static_cast<int>(dims_y.size()) == dims_x.nbDims + 1) {
|
||||||
|
if (dims_y[0] == 1) dims_y.erase(dims_y.begin());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (static_cast<int>(dims_y.size()) == 1 && dims_y[0] == dims_x.d[0]) {
|
||||||
|
scale_mode = nvinfer1::ScaleMode::kCHANNEL;
|
||||||
|
} else if (static_cast<int>(dims_y.size()) == dims_x.nbDims &&
|
||||||
|
dims_y[0] == dims_x.d[0]) {
|
||||||
|
scale_mode = nvinfer1::ScaleMode::kELEMENTWISE;
|
||||||
|
for (int i = 1; i < dims_x.nbDims; i++) {
|
||||||
|
if (dims_y[i] != dims_x.d[i]) {
|
||||||
|
scale_mode = nvinfer1::ScaleMode::kCHANNEL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (scale_mode == nvinfer1::ScaleMode::kCHANNEL) {
|
||||||
|
for (int i = 1; i < dims_x.nbDims; i++) {
|
||||||
|
if (dims_y[i] != 1)
|
||||||
|
PADDLE_THROW(
|
||||||
|
"TensorRT unsupported weight shape for Elementwise op!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
PADDLE_THROW("TensorRT unsupported weight Shape for Elementwise op!");
|
||||||
|
}
|
||||||
|
|
||||||
|
TensorRTEngine::Weight shift_weights{nvinfer1::DataType::kFLOAT,
|
||||||
|
static_cast<void*>(weight_data),
|
||||||
|
Y_t->memory_size() / sizeof(float)};
|
||||||
|
TensorRTEngine::Weight scale_weights{nvinfer1::DataType::kFLOAT, nullptr,
|
||||||
|
0};
|
||||||
|
TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr,
|
||||||
|
0};
|
||||||
|
|
||||||
|
nvinfer1::IScaleLayer* layer = TRT_ENGINE_ADD_LAYER(
|
||||||
|
engine_, Scale, *const_cast<nvinfer1::ITensor*>(X), scale_mode,
|
||||||
|
shift_weights.get(), scale_weights.get(), power_weights.get());
|
||||||
|
auto output_name = op_desc.Output("Out")[0];
|
||||||
|
engine_->SetITensor(output_name, layer->getOutput(0));
|
||||||
|
if (test_mode) { // the test framework can not determine which is the
|
||||||
|
// output, so place the declaration inside.
|
||||||
|
engine_->DeclareOutput(output_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class ElementwiseTensorOpConverter : public OpConverter {
|
||||||
|
public:
|
||||||
|
ElementwiseTensorOpConverter() {}
|
||||||
|
void operator()(const framework::proto::OpDesc& op,
|
||||||
|
const framework::Scope& scope, bool test_mode) override {
|
||||||
|
// Here the two nullptr looks strange, that's because the
|
||||||
|
// framework::OpDesc's constructor is strange.
|
||||||
|
framework::OpDesc op_desc(op, nullptr);
|
||||||
|
LOG(INFO) << "convert a fluid elementwise op to tensorrt IScaleLayer";
|
||||||
|
|
||||||
|
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
|
||||||
|
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight
|
||||||
|
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
|
||||||
|
|
||||||
|
auto* X = engine_->GetITensor(op_desc.Input("X").front());
|
||||||
|
auto* Y = engine_->GetITensor(op_desc.Input("Y").front());
|
||||||
|
nvinfer1::Dims dims_x = X->getDimensions();
|
||||||
|
nvinfer1::Dims dims_y = Y->getDimensions();
|
||||||
|
|
||||||
|
// The two input tensor should have the same dims
|
||||||
|
PADDLE_ENFORCE(dims_x.nbDims >= 3);
|
||||||
|
if (dims_x.nbDims == dims_y.nbDims) {
|
||||||
|
for (int i = 0; i < dims_x.nbDims; i++) {
|
||||||
|
if (dims_x.d[i] != dims_y.d[i])
|
||||||
|
PADDLE_THROW("TensorRT unsupported tensor shape for Elementwise op!");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
PADDLE_THROW("TensorRT unsupported tensor shape for Elementwise op!");
|
||||||
|
}
|
||||||
|
|
||||||
|
auto op_pair = ops.find(op_type_);
|
||||||
|
if (op_pair == ops.end()) {
|
||||||
|
PADDLE_THROW("Wrong elementwise op type!");
|
||||||
|
}
|
||||||
|
nvinfer1::IElementWiseLayer* layer = TRT_ENGINE_ADD_LAYER(
|
||||||
|
engine_, ElementWise, *const_cast<nvinfer1::ITensor*>(X),
|
||||||
|
*const_cast<nvinfer1::ITensor*>(Y), op_pair->second);
|
||||||
|
|
||||||
|
auto output_name = op_desc.Output("Out")[0];
|
||||||
|
engine_->SetITensor(output_name, layer->getOutput(0));
|
||||||
|
if (test_mode) { // the test framework can not determine which is the
|
||||||
|
// output, so place the declaration inside.
|
||||||
|
engine_->DeclareOutput(output_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
static const std::unordered_map<std::string, nvinfer1::ElementWiseOperation>
|
||||||
|
ops;
|
||||||
|
std::string op_type_;
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::unordered_map<std::string, nvinfer1::ElementWiseOperation>
|
||||||
|
ElementwiseTensorOpConverter::ops = {
|
||||||
|
{"add", nvinfer1::ElementWiseOperation::kSUM},
|
||||||
|
{"mul", nvinfer1::ElementWiseOperation::kPROD},
|
||||||
|
{"sub", nvinfer1::ElementWiseOperation::kSUB},
|
||||||
|
{"div", nvinfer1::ElementWiseOperation::kDIV},
|
||||||
|
{"min", nvinfer1::ElementWiseOperation::kMIN},
|
||||||
|
{"pow", nvinfer1::ElementWiseOperation::kPOW},
|
||||||
|
{"max", nvinfer1::ElementWiseOperation::kMAX},
|
||||||
|
};
|
||||||
|
|
||||||
|
class ElementwiseTensorAddOpConverter : public ElementwiseTensorOpConverter {
|
||||||
|
public:
|
||||||
|
ElementwiseTensorAddOpConverter() { op_type_ = "add"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
class ElementwiseTensorMulOpConverter : public ElementwiseTensorOpConverter {
|
||||||
|
public:
|
||||||
|
ElementwiseTensorMulOpConverter() { op_type_ = "mul"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
class ElementwiseTensorSubOpConverter : public ElementwiseTensorOpConverter {
|
||||||
|
public:
|
||||||
|
ElementwiseTensorSubOpConverter() { op_type_ = "sub"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
class ElementwiseTensorDivOpConverter : public ElementwiseTensorOpConverter {
|
||||||
|
public:
|
||||||
|
ElementwiseTensorDivOpConverter() { op_type_ = "div"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
class ElementwiseTensorMinOpConverter : public ElementwiseTensorOpConverter {
|
||||||
|
public:
|
||||||
|
ElementwiseTensorMinOpConverter() { op_type_ = "min"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
class ElementwiseTensorMaxOpConverter : public ElementwiseTensorOpConverter {
|
||||||
|
public:
|
||||||
|
ElementwiseTensorMaxOpConverter() { op_type_ = "max"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
class ElementwiseTensorPowOpConverter : public ElementwiseTensorOpConverter {
|
||||||
|
public:
|
||||||
|
ElementwiseTensorPowOpConverter() { op_type_ = "pow"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace tensorrt
|
||||||
|
} // namespace inference
|
||||||
|
} // namespace paddle
|
||||||
|
|
||||||
|
REGISTER_TRT_OP_CONVERTER(elementwise_add_weight, ElementwiseWeightOpConverter);
|
||||||
|
|
||||||
|
REGISTER_TRT_OP_CONVERTER(elementwise_add_tensor,
|
||||||
|
ElementwiseTensorAddOpConverter);
|
||||||
|
REGISTER_TRT_OP_CONVERTER(elementwise_sub_tensor,
|
||||||
|
ElementwiseTensorSubOpConverter);
|
||||||
|
REGISTER_TRT_OP_CONVERTER(elementwise_div_tensor,
|
||||||
|
ElementwiseTensorDivOpConverter);
|
||||||
|
REGISTER_TRT_OP_CONVERTER(elementwise_mul_tensor,
|
||||||
|
ElementwiseTensorMulOpConverter);
|
||||||
|
REGISTER_TRT_OP_CONVERTER(elementwise_max_tensor,
|
||||||
|
ElementwiseTensorMaxOpConverter);
|
||||||
|
REGISTER_TRT_OP_CONVERTER(elementwise_min_tensor,
|
||||||
|
ElementwiseTensorMinOpConverter);
|
||||||
|
REGISTER_TRT_OP_CONVERTER(elementwise_pow_tensor,
|
||||||
|
ElementwiseTensorPowOpConverter);
|
@ -0,0 +1,73 @@
|
|||||||
|
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
|
||||||
|
#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace inference {
|
||||||
|
namespace tensorrt {
|
||||||
|
|
||||||
|
TEST(elementwise_op, add_weight_test) {
|
||||||
|
std::unordered_set<std::string> parameters({"elementwise_add-Y"});
|
||||||
|
framework::Scope scope;
|
||||||
|
TRTConvertValidation validator(10, parameters, scope, 1 << 15);
|
||||||
|
validator.DeclInputVar("elementwise_add-X", nvinfer1::DimsCHW(10, 3, 3));
|
||||||
|
validator.DeclParamVar("elementwise_add-Y", nvinfer1::Dims3(10, 1, 1));
|
||||||
|
// validator.DeclParamVar("mul-Y", nvinfer1::Dims2(8, 2));
|
||||||
|
validator.DeclOutputVar("elementwise_add-Out", nvinfer1::DimsCHW(10, 3, 3));
|
||||||
|
|
||||||
|
// Prepare Op description
|
||||||
|
framework::OpDesc desc;
|
||||||
|
desc.SetType("elementwise_add");
|
||||||
|
desc.SetInput("X", {"elementwise_add-X"});
|
||||||
|
desc.SetInput("Y", {"elementwise_add-Y"});
|
||||||
|
desc.SetOutput("Out", {"elementwise_add-Out"});
|
||||||
|
|
||||||
|
int axis = 1;
|
||||||
|
desc.SetAttr("axis", axis);
|
||||||
|
|
||||||
|
validator.SetOp(*desc.Proto());
|
||||||
|
|
||||||
|
validator.Execute(8);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(elementwise_op, add_tensor_test) {
|
||||||
|
std::unordered_set<std::string> parameters;
|
||||||
|
framework::Scope scope;
|
||||||
|
TRTConvertValidation validator(8, parameters, scope, 1 << 15);
|
||||||
|
validator.DeclInputVar("elementwise_add-X", nvinfer1::DimsCHW(10, 3, 3));
|
||||||
|
validator.DeclInputVar("elementwise_add-Y", nvinfer1::Dims3(10, 3, 3));
|
||||||
|
// validator.DeclParamVar("mul-Y", nvinfer1::Dims2(8, 2));
|
||||||
|
validator.DeclOutputVar("elementwise_add-Out", nvinfer1::DimsCHW(10, 3, 3));
|
||||||
|
|
||||||
|
// Prepare Op description
|
||||||
|
framework::OpDesc desc;
|
||||||
|
desc.SetType("elementwise_add");
|
||||||
|
desc.SetInput("X", {"elementwise_add-X"});
|
||||||
|
desc.SetInput("Y", {"elementwise_add-Y"});
|
||||||
|
desc.SetOutput("Out", {"elementwise_add-Out"});
|
||||||
|
|
||||||
|
// the defalut axis of elementwise op is -1
|
||||||
|
|
||||||
|
validator.SetOp(*desc.Proto());
|
||||||
|
|
||||||
|
validator.Execute(8);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace tensorrt
|
||||||
|
} // namespace inference
|
||||||
|
} // namespace paddle
|
||||||
|
USE_OP(elementwise_add);
|
Loading…
Reference in new issue