cherry-pick from feature/anakin-engine: Add subgraph fuse support and anakin engine #16018

move-code
nhzlx 6 years ago
parent 084310f536
commit b21770a2aa

@ -1,8 +1,12 @@
cc_library(anakin_op_converter SRCS fc.cc conv2d.cc activation.cc pool2d.cc concat.cc split.cc DEPS anakin_engine framework_proto scope op_registry)
cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} anakin_op_converter mul_op)
cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL)
cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} activation_op anakin_op_converter
SERIAL)
cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} anakin_op_converter pool_op pooling)
cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} anakin_op_converter concat_op concat_and_split)
cc_test(test_anakin_split SRCS test_split_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} anakin_op_converter split_op concat_and_split)
cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc DEPS anakin_engine framework_proto scope op_registry)
cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op)
cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL)
cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter SERIAL)
cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling)
cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split)
cc_test(test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split)
cc_test(test_anakin_elementwise SRCS test_elementwise_op.cc DEPS
anakin_op_converter elementwise_add_op)
cc_test(test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter SERIAL)

@ -45,15 +45,11 @@ void ActivationOpConverter::operator()(const framework::proto::OpDesc &op,
auto output_name = op_desc.Output("Out").front();
engine_->AddOp(op_name, "Activation", {input_name}, {output_name});
engine_->AddOpAttr(op_name, "type", anakin_op_type_);
if (op_type_ == "relu") {
engine_->AddOpAttr(op_name, "alpha", 0);
}
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(relu, ReluOpConverter);
REGISTER_ANAKIN_OP_CONVERTER(sigmoid, SigmoidOpConverter);
REGISTER_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter);

@ -34,13 +34,8 @@ class ActivationOpConverter : public AnakinOpConverter {
private:
std::string op_type_;
std::string anakin_op_type_;
std::map<std::string, std::string> anakin_ops_type_{
{"relu", "Relu"}, {"tanh", "TanH"}, {"sigmoid", "Sigmoid"}};
};
class ReluOpConverter : public ActivationOpConverter {
public:
ReluOpConverter() : ActivationOpConverter("relu") {}
std::map<std::string, std::string> anakin_ops_type_{{"tanh", "TanH"},
{"sigmoid", "Sigmoid"}};
};
class TanhOpConverter : public ActivationOpConverter {
@ -50,7 +45,7 @@ class TanhOpConverter : public ActivationOpConverter {
class SigmoidOpConverter : public ActivationOpConverter {
public:
SigmoidOpConverter() : ActivationOpConverter("tanh") {}
SigmoidOpConverter() : ActivationOpConverter("sigmoid") {}
};
} // namespace anakin
} // namespace inference

@ -32,8 +32,8 @@ void ConcatOpConverter::operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
auto input_names = op_desc.Input("X");
int axis = boost::get<int>(op_desc.GetAttr("axis"));
auto input_names = op_desc.Input("X");
PADDLE_ENFORCE(axis > 0,
"The axis attr of Concat op should be large than 0 for trt");

@ -51,10 +51,11 @@ void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op,
PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
// const int n_output = weight_tensor->dims()[0];
const int n_input = weight_tensor->dims()[1];
// const int n_input = weight_tensor->dims()[1];
const int filter_h = weight_tensor->dims()[2];
const int filter_w = weight_tensor->dims()[3];
auto filter_num = n_input * filter_h * filter_w;
// auto filter_num = n_input * filter_h * filter_w ;
auto filter_num = weight_tensor->dims()[0];
engine_->AddOpAttr<int>(op_name, "filter_num", filter_num);
engine_->AddOpAttr<PTuple<int>>(op_name, "kernel_size", {filter_h, filter_w});
auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides"));

@ -0,0 +1,113 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/conv2d_fusion.h"
#include <algorithm>
#include <memory>
#include <vector>
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::saber::NV;
using anakin::saber::Shape;
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Input("Bias").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1UL);
auto input_name = op_desc.Input("Input").front();
auto output_name = op_desc.Output("Output").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front();
engine_->AddOp(op_name, "Convolution", {input_name}, {output_name});
auto *filter_v = scope.FindVar(op_desc.Input("Filter").front());
PADDLE_ENFORCE_NOT_NULL(filter_v);
auto *filter_t = filter_v->GetMutable<framework::LoDTensor>();
auto *b_v = scope.FindVar(op_desc.Input("Bias").front());
PADDLE_ENFORCE_NOT_NULL(b_v);
auto *b_t = b_v->GetMutable<framework::LoDTensor>();
std::unique_ptr<framework::LoDTensor> weight_tensor(
new framework::LoDTensor());
weight_tensor->Resize(filter_t->dims());
TensorCopySync((*filter_t), platform::CPUPlace(), weight_tensor.get());
PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
// const int n_output = weight_tensor->dims()[0];
// const int n_input = weight_tensor->dims()[1];
const int filter_h = weight_tensor->dims()[2];
const int filter_w = weight_tensor->dims()[3];
// auto filter_num = n_input * filter_h * filter_w ;
auto filter_num = weight_tensor->dims()[0];
engine_->AddOpAttr<int>(op_name, "filter_num", filter_num);
engine_->AddOpAttr<PTuple<int>>(op_name, "kernel_size", {filter_h, filter_w});
auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
engine_->AddOpAttr<PTuple<int>>(op_name, "strides", strides);
auto paddings = boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
engine_->AddOpAttr<PTuple<int>>(op_name, "padding", paddings);
auto dilations = boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
engine_->AddOpAttr<PTuple<int>>(op_name, "dilation_rate", dilations);
const int groups = boost::get<int>(op_desc.GetAttr("groups"));
engine_->AddOpAttr(op_name, "group", groups);
engine_->AddOpAttr(op_name, "axis", 1);
engine_->AddOpAttr(op_name, "bias_term", true);
auto weight_shape = framework::vectorize2int(filter_t->dims());
Shape anakin_shape(weight_shape);
auto *weight1 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(anakin_shape);
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
std::copy_n(weight_tensor->data<float>(), weight_tensor->numel(), cpu_data);
weight1->d_tensor().set_shape(anakin_shape);
weight1->d_tensor().copy_from(weight1->h_tensor());
engine_->AddOpAttr(op_name, "weight_1", *weight1);
auto bias_shape = framework::vectorize2int(b_t->dims());
framework::LoDTensor bias_tensor;
bias_tensor.Resize(b_t->dims());
TensorCopySync((*b_t), platform::CPUPlace(), &bias_tensor);
auto *bias_data = bias_tensor.data<float>();
bias_shape.insert(bias_shape.begin(), 1);
bias_shape.insert(bias_shape.begin(), 1);
bias_shape.insert(bias_shape.begin(), 1);
// bias_shape.push_back(1);
// bias_shape.push_back(1);
Shape anakin_bias_shape(bias_shape);
auto *weight2 = GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(
anakin_bias_shape);
float *cpu_data2 = static_cast<float *>(weight2->h_tensor().mutable_data());
std::copy_n(bias_data, bias_tensor.numel(), cpu_data2);
weight2->d_tensor().set_shape(anakin_bias_shape);
weight2->d_tensor().copy_from(weight2->h_tensor());
engine_->AddOpAttr(op_name, "weight_2", *weight2);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(conv2d_fusion, Conv2dFusionOpConverter);

@ -0,0 +1,35 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
class Conv2dFusionOpConverter : public AnakinOpConverter {
public:
Conv2dFusionOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) override;
virtual ~Conv2dFusionOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle

@ -0,0 +1,57 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/elementwise.h"
#include <algorithm>
#include <string>
#include <vector>
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::Precision;
using anakin::saber::NV;
using anakin::saber::X86;
using anakin::saber::Shape;
using anakin::PBlock;
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
void ElementwiseAddOpConverter::operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto x_name = op_desc.Input("X").front();
auto y_name = op_desc.Input("Y").front();
auto out_name = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
engine_->AddOp(op_name, "Eltwise", {x_name, y_name}, {out_name});
std::string elementwise_type = "Add";
engine_->AddOpAttr<std::string>(op_name, "type", elementwise_type);
std::vector<float> coeff = {1.0, 1.0};
engine_->AddOpAttr<PTuple<float>>(op_name, "coeff", coeff);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(elementwise_add, ElementwiseAddOpConverter);

@ -0,0 +1,37 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
class ElementwiseAddOpConverter : public AnakinOpConverter {
public:
ElementwiseAddOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) override;
virtual ~ElementwiseAddOpConverter() {}
private:
};
} // namespace anakin
} // namespace inference
} // namespace paddle

@ -14,6 +14,8 @@
#include "paddle/fluid/inference/anakin/convert/fc.h"
#include <algorithm>
#include <string>
#include <vector>
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
@ -24,28 +26,39 @@ namespace paddle {
namespace inference {
namespace anakin {
void FcOpConverter::operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope, bool test_mode) {
void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto input_names = op_desc.InputNames();
bool with_bias = input_names.size() == 3;
std::string w_name = "Y";
std::string i_name = "X";
if (with_bias) {
w_name = "W";
i_name = "Input";
}
auto x_name = op_desc.Input("X").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto *y_v = scope.FindVar(op_desc.Input("Y").front());
// get weights
auto *y_v = scope.FindVar(op_desc.Input(w_name).front());
PADDLE_ENFORCE_NOT_NULL(y_v);
auto *y_t = y_v->GetMutable<framework::LoDTensor>();
auto input_name = op_desc.Input("X").front();
auto input_name = op_desc.Input(i_name).front();
auto output_name = op_desc.Output("Out").front();
auto weight_shape = framework::vectorize2int(y_t->dims());
engine_->AddOp(op_name, "Dense", {input_name}, {output_name});
engine_->AddOpAttr(op_name, "bias_term", false);
engine_->AddOpAttr(op_name, "bias_term", with_bias);
engine_->AddOpAttr(op_name, "axis", 1);
auto weight_shape = framework::vectorize2int(y_t->dims());
int out_dim = weight_shape[1];
engine_->AddOpAttr(op_name, "out_dim", out_dim);
const int w_m = weight_shape[0];
const int w_k = weight_shape[1];
weight_shape.push_back(1);
weight_shape.push_back(1);
@ -54,18 +67,54 @@ void FcOpConverter::operator()(const framework::proto::OpDesc &op,
framework::LoDTensor weight_tensor;
weight_tensor.Resize(y_t->dims());
TensorCopySync((*y_t), platform::CPUPlace(), &weight_tensor);
auto *weight_data = weight_tensor.data<float>();
PADDLE_ENFORCE(w_m * w_k == weight_tensor.numel());
std::vector<float> trans_weight_data(weight_tensor.numel());
for (int i = 0; i < w_m; i++) {
for (int j = 0; j < w_k; j++) {
trans_weight_data[i + j * w_m] = weight_data[i * w_k + j];
}
}
auto *weight1 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(anakin_shape);
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
std::copy_n(weight_tensor.data<float>(), weight_tensor.numel(), cpu_data);
std::copy_n(trans_weight_data.data(), weight_tensor.numel(), cpu_data);
weight1->d_tensor().set_shape(anakin_shape);
weight1->d_tensor().copy_from(weight1->h_tensor());
engine_->AddOpAttr(op_name, "weight_1", *weight1);
// get bias
if (with_bias) {
auto *b_v = scope.FindVar(op_desc.Input("Bias").front());
PADDLE_ENFORCE_NOT_NULL(b_v);
auto *b_t = b_v->GetMutable<framework::LoDTensor>();
auto bias_shape = framework::vectorize2int(b_t->dims());
framework::LoDTensor bias_tensor;
bias_tensor.Resize(b_t->dims());
TensorCopySync((*b_t), platform::CPUPlace(), &bias_tensor);
auto *bias_data = bias_tensor.data<float>();
bias_shape.insert(bias_shape.begin(), 1);
bias_shape.insert(bias_shape.begin(), 1);
bias_shape.insert(bias_shape.begin(), 1);
// bias_shape.push_back(1);
// bias_shape.push_back(1);
Shape anakin_bias_shape(bias_shape);
auto *weight2 = GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(
anakin_bias_shape);
float *cpu_data2 = static_cast<float *>(weight2->h_tensor().mutable_data());
std::copy_n(bias_data, bias_tensor.numel(), cpu_data2);
weight2->d_tensor().set_shape(anakin_bias_shape);
weight2->d_tensor().copy_from(weight2->h_tensor());
engine_->AddOpAttr(op_name, "weight_2", *weight2);
}
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(mul, MulOpConverter);
REGISTER_ANAKIN_OP_CONVERTER(fc, FcOpConverter);

@ -20,14 +20,26 @@ namespace paddle {
namespace inference {
namespace anakin {
class FcOpConverter : public AnakinOpConverter {
class FcBaseOpConverter : public AnakinOpConverter {
public:
FcOpConverter() = default;
FcBaseOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) override;
virtual ~FcOpConverter() {}
virtual ~FcBaseOpConverter() {}
};
// with bias
class FcOpConverter : public FcBaseOpConverter {
public:
FcOpConverter() = default;
};
// without bias
class MulOpConverter : public FcBaseOpConverter {
public:
MulOpConverter() = default;
};
} // namespace anakin

@ -47,14 +47,6 @@ class AnakinOpConverter {
std::string op_type = op_desc.Type();
AnakinOpConverter *it = nullptr;
if (op_type == "mul") {
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL);
std::string Y = op_desc.Input("Y")[0];
if (parameters.count(Y)) {
it = Registry<AnakinOpConverter>::Global().Lookup("fc");
}
}
if (!it) {
it = Registry<AnakinOpConverter>::Global().Lookup(op_type);
}

@ -0,0 +1,47 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/relu.h"
#include <algorithm>
#include <map>
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::saber::NV;
using anakin::saber::Shape;
namespace paddle {
namespace inference {
namespace anakin {
void ReluOpConverter::operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto input_name = op_desc.Input("X").front();
auto output_name = op_desc.Output("Out").front();
engine_->AddOp(op_name, "ReLU", {input_name}, {output_name});
engine_->AddOpAttr(op_name, "alpha", 0);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(relu, ReluOpConverter);

@ -0,0 +1,37 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
class ReluOpConverter : public AnakinOpConverter {
public:
ReluOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) override;
virtual ~ReluOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle

@ -41,16 +41,13 @@ static void test_activation_op(const std::string &op_type) {
validator.Execute(5);
}
TEST(relu_op, test) { test_activation_op("relu"); }
TEST(sigm_op, test) { test_activation_op("sigmoid"); }
TEST(tanh_op, test) { test_activation_op("tanh"); }
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(relu);
USE_OP(sigmoid);
USE_OP(tanh);
USE_ANAKIN_CONVERTER(relu);
USE_ANAKIN_CONVERTER(sigmoid);
USE_ANAKIN_CONVERTER(tanh);

@ -25,10 +25,10 @@ TEST(concat_op, test) {
std::unordered_set<std::string> parameters({""});
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
validator.DeclInputVar("concat_x1", {1, 10, 3, 1});
validator.DeclInputVar("concat_x2", {1, 3, 3, 1});
validator.DeclInputVar("concat_x3", {1, 7, 3, 1});
validator.DeclOutputVar("concat_out", {1, 20, 3, 1});
validator.DeclInputVar("concat_x1", {1, 2, 1, 1});
validator.DeclInputVar("concat_x2", {1, 3, 1, 1});
validator.DeclInputVar("concat_x3", {1, 1, 1, 1});
validator.DeclOutputVar("concat_out", {1, 6, 1, 1});
// Prepare Op description
framework::OpDesc desc;

@ -28,9 +28,9 @@ TEST(conv2d_op, test) {
std::unordered_set<std::string> parameters({"conv2d-Y"});
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
validator.DeclInputVar("conv2d-X", {1, 2, 5, 5});
validator.DeclParamVar("conv2d-Y", {3, 2, 3, 3});
validator.DeclOutputVar("conv2d-Out", {1, 3, 5, 5});
validator.DeclInputVar("conv2d-X", {1, 3, 3, 3});
validator.DeclParamVar("conv2d-Y", {4, 3, 1, 1});
validator.DeclOutputVar("conv2d-Out", {1, 4, 3, 3});
// Prepare Op description
framework::OpDesc desc;
@ -40,7 +40,7 @@ TEST(conv2d_op, test) {
desc.SetOutput("Output", {"conv2d-Out"});
const std::vector<int> strides({1, 1});
const std::vector<int> paddings({1, 1});
const std::vector<int> paddings({0, 0});
const std::vector<int> dilations({1, 1});
const int groups = 1;

@ -0,0 +1,50 @@
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
TEST(elementwise_op, native) {
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
validator.DeclInputVar("elementwise_add_x", {1, 1, 2, 2});
validator.DeclInputVar("elementwise_y", {1, 1, 2, 2});
validator.DeclOutputVar("elementwise_out", {1, 1, 2, 2});
// Prepare Op description
framework::OpDesc desc;
desc.SetType("elementwise_add");
desc.SetInput("X", {"elementwise_add_x"});
desc.SetInput("Y", {"elementwise_y"});
desc.SetOutput("Out", {"elementwise_out"});
int axis = -1;
desc.SetAttr("axis", axis);
validator.SetOp(*desc.Proto());
validator.Execute(1);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(elementwise_add);
USE_ANAKIN_CONVERTER(elementwise_add);

@ -27,9 +27,9 @@ TEST(fc_op, test) {
std::unordered_set<std::string> parameters({"mul_y"});
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
validator.DeclInputVar("mul_x", {1, 1, 1, 1});
validator.DeclParamVar("mul_y", {1, 2});
validator.DeclOutputVar("mul_out", {1, 1, 1, 2});
validator.DeclInputVar("mul_x", {1, 1, 2, 2});
validator.DeclParamVar("mul_y", {4, 2});
validator.DeclOutputVar("mul_out", {1, 2});
// Prepare Op description
framework::OpDesc desc;
@ -37,8 +37,8 @@ TEST(fc_op, test) {
desc.SetInput("X", {"mul_x"});
desc.SetInput("Y", {"mul_y"});
desc.SetOutput("Out", {"mul_out"});
int num_flatten_dims = 3;
desc.SetAttr("x_num_col_dims", num_flatten_dims);
// int num_flatten_dims = 3;
// desc.SetAttr("x_num_col_dims", num_flatten_dims);
validator.SetOp(*desc.Proto());
validator.Execute(10);

@ -0,0 +1,50 @@
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/relu.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
static void test_activation_op(const std::string &op_type) {
auto *converter = Registry<AnakinOpConverter>::Global().Lookup(op_type);
PADDLE_ENFORCE(converter != nullptr);
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
validator.DeclInputVar("act-X", {10, 6, 1, 1});
validator.DeclOutputVar("act-Out", {10, 6, 1, 1});
framework::OpDesc desc;
desc.SetType(op_type);
desc.SetInput("X", {"act-X"});
desc.SetOutput("Out", {"act-Out"});
LOG(INFO) << "set OP";
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(5);
}
TEST(sigm_op, test) { test_activation_op("relu"); }
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(relu);
USE_ANAKIN_CONVERTER(relu);

@ -161,10 +161,6 @@ class AnakinConvertValidation {
framework::TensorToVector(*tensor, ctx, &fluid_out);
fluid_outputs.push_back(fluid_out);
// size_t fluid_out_size = fluid_out.size();
/*for (size_t i = 0; i < fluid_out_size; i++) {
std::cout << fluid_out[i] << std::endl;
}*/
outputs.insert({output, tensor});
}
@ -180,8 +176,7 @@ class AnakinConvertValidation {
size_t anakin_out_size = anakin_out.size();
auto fluid_out = fluid_outputs[i_output++];
for (size_t i = 0; i < anakin_out_size; i++) {
LOG(INFO) << "Output[" << i << "]: anakin[" << anakin_out[i] << "], "
<< "fluid[" << fluid_out[i] << "]";
EXPECT_LT(std::abs(fluid_out[i] - anakin_out[i]), 1e-3);
}
}
}

@ -68,29 +68,34 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
auto *tensor = input.second;
auto *data = tensor->data<float>();
auto shape = framework::vectorize2int(tensor->dims());
::anakin::saber::Shape anakin_shape(shape);
auto *anakin_input = net_->get_in(input.first);
auto anakin_input_shape = anakin_input->valid_shape();
PADDLE_ENFORCE(tensor->numel(), anakin_input_shape.count(),
"the fluid input size should be equal to anakin");
::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0,
anakin_shape);
anakin_input->share_from(tmp_anakin_tensor);
anakin_input_shape);
anakin_input->copy_from(tmp_anakin_tensor);
}
for (const auto &output : outputs) {
auto *tensor = output.second;
auto *data = tensor->data<float>();
auto shape = framework::vectorize2int(tensor->dims());
::anakin::saber::Shape anakin_shape(shape);
auto *anakin_output = net_->get_out(output.first);
auto anakin_output_shape = anakin_output->valid_shape();
PADDLE_ENFORCE(tensor->numel(), anakin_output_shape.count(),
"the fluid output size should be equal to anakin");
::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0,
anakin_shape);
anakin_output_shape);
anakin_output->share_from(tmp_anakin_tensor);
}
net_->prediction();
cudaDeviceSynchronize();
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::Freeze() {
PADDLE_ENFORCE(graph_->Freeze(), "Freeze anakin subgraph.");
PADDLE_ENFORCE(graph_->Freeze_v3(), "Freeze anakin subgraph.");
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>

@ -46,6 +46,9 @@ namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionType,
::anakin::OpRunType RunType = ::anakin::OpRunType::ASYNC>
class AnakinEngine {
using NetT = ::anakin::Net<TargetT, PrecisionType, RunType>;
using GraphT = ::anakin::graph::Graph<TargetT, PrecisionType>;
public:
explicit AnakinEngine(bool need_summary = false);
~AnakinEngine();
@ -61,16 +64,15 @@ class AnakinEngine {
PADDLE_ENFORCE(graph_->AddOpAttr(op_name, attr_name, attr_value),
"Add operation's attribution.");
}
NetT *Net() { return net_.get(); }
std::unique_ptr<AnakinEngine> Clone();
void Freeze();
void Optimize();
void Save(std::string path) { graph_->save(path); }
void Execute(const std::map<std::string, framework::LoDTensor *> &inputs,
const std::map<std::string, framework::LoDTensor *> &outputs);
private:
using NetT = ::anakin::Net<TargetT, PrecisionType, RunType>;
using GraphT = ::anakin::graph::Graph<TargetT, PrecisionType>;
std::unique_ptr<GraphT> graph_;
std::unique_ptr<NetT> net_;
};

@ -20,7 +20,18 @@ namespace anakin {
// Just tell by the op_types.
struct SimpleOpTypeSetTeller : public Teller {
SimpleOpTypeSetTeller() {}
SimpleOpTypeSetTeller() {
// teller_set.insert("mul");
teller_set.insert("fc");
teller_set.insert("conv2d_fusion");
teller_set.insert("split");
teller_set.insert("relu");
teller_set.insert("pool2d");
teller_set.insert("elementwise_add");
teller_set.insert("concat");
teller_set.insert("tanh");
// teller_set.insert("conv2d");
}
bool operator()(const std::string& op_type,
const framework::OpDesc& desc) override {
@ -28,7 +39,7 @@ struct SimpleOpTypeSetTeller : public Teller {
}
private:
std::unordered_set<std::string> teller_set{{"mul"}};
std::unordered_set<std::string> teller_set;
};
bool OpTeller::Tell(const std::string& op_type, const framework::OpDesc& desc) {

@ -13,8 +13,11 @@
// limitations under the License.
#include "paddle/fluid/inference/analysis/ir_pass_manager.h"
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
@ -63,7 +66,14 @@ void IRPassManager::CreatePasses(Argument *argument,
} else if (pass_name == "cpu_quantize_pass") {
pass->Set("quant_var_scales",
new VarQuantScale(argument->quant_var_scales()));
} else if (pass_name == "tensorrt_subgraph_pass") {
}
if (pass_name == "anakin_subgraph_pass") {
pass->Set("program",
new framework::ProgramDesc *(&argument->main_program()));
}
if (pass_name == "tensorrt_subgraph_pass") {
pass->Set("workspace_size", new int(argument->tensorrt_workspace_size()));
pass->Set("max_batch_size", new int(argument->tensorrt_max_batch_size()));
pass->Set("min_subgraph_size",

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save