Cherry-Pick from 16662 : Anakin subgraph cpu support

feature/anakin-engine3
nhzlx 6 years ago
parent 8643dbc233
commit 7ad182e16c

@ -25,8 +25,9 @@ endif()
if(ANAKIN_FOUND) if(ANAKIN_FOUND)
message(STATUS "Current ANAKIN header is ${ANAKIN_INCLUDE_DIR}/anakin_config.h. ") message(STATUS "Current ANAKIN header is ${ANAKIN_INCLUDE_DIR}/anakin_config.h. ")
include_directories(${ANAKIN_ROOT})
include_directories(${ANAKIN_ROOT}/include) include_directories(${ANAKIN_ROOT}/include)
include_directories(${ANAKIN_ROOT}/include/saber) include_directories(${ANAKIN_ROOT}/saber)
link_directories(${ANAKIN_ROOT}) link_directories(${ANAKIN_ROOT})
add_definitions(-DPADDLE_WITH_ANAKIN) add_definitions(-DPADDLE_WITH_ANAKIN)
endif() endif()

@ -16,16 +16,13 @@
#include <algorithm> #include <algorithm>
#include <map> #include <map>
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::saber::NV;
using anakin::saber::Shape;
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
ActivationOpConverter::ActivationOpConverter(const std::string &op_type) template <typename TargetT>
ActivationOpConverter<TargetT>::ActivationOpConverter(
const std::string &op_type)
: op_type_(op_type) { : op_type_(op_type) {
auto it = anakin_op_types_.find(op_type_); auto it = anakin_op_types_.find(op_type_);
PADDLE_ENFORCE(it != anakin_op_types_.end(), PADDLE_ENFORCE(it != anakin_op_types_.end(),
@ -33,10 +30,10 @@ ActivationOpConverter::ActivationOpConverter(const std::string &op_type)
anakin_op_type_ = it->second; anakin_op_type_ = it->second;
} }
void ActivationOpConverter::operator()(const framework::proto::OpDesc &op, template <typename TargetT>
const framework::BlockDesc &block_desc, void ActivationOpConverter<TargetT>::operator()(
const framework::Scope &scope, const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
bool test_mode) { const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
@ -44,13 +41,20 @@ void ActivationOpConverter::operator()(const framework::proto::OpDesc &op,
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto input_name = op_desc.Input("X").front(); auto input_name = op_desc.Input("X").front();
auto output_name = op_desc.Output("Out").front(); auto output_name = op_desc.Output("Out").front();
engine_->AddOp(op_name, "Activation", {input_name}, {output_name}); this->engine_->AddOp(op_name, "Activation", {input_name}, {output_name});
engine_->AddOpAttr(op_name, "type", anakin_op_type_); this->engine_->AddOpAttr(op_name, "type", anakin_op_type_);
} }
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(sigmoid, SigmoidOpConverter); #ifdef PADDLE_WITH_CUDA
REGISTER_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter); REGISTER_CUDA_ANAKIN_OP_CONVERTER(sigmoid,
SigmoidOpConverter<::anakin::saber::NV>);
REGISTER_CUDA_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter<::anakin::saber::NV>);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER(sigmoid,
SigmoidOpConverter<::anakin::saber::X86>);
REGISTER_CPU_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter<::anakin::saber::X86>);

@ -22,7 +22,8 @@ namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
class ActivationOpConverter : public AnakinOpConverter { template <typename TargetT>
class ActivationOpConverter : public AnakinOpConverter<TargetT> {
public: public:
explicit ActivationOpConverter(const std::string &op_type); explicit ActivationOpConverter(const std::string &op_type);
@ -39,14 +40,16 @@ class ActivationOpConverter : public AnakinOpConverter {
{"sigmoid", "Sigmoid"}}; {"sigmoid", "Sigmoid"}};
}; };
class TanhOpConverter : public ActivationOpConverter { template <typename TargetT>
class TanhOpConverter : public ActivationOpConverter<TargetT> {
public: public:
TanhOpConverter() : ActivationOpConverter("tanh") {} TanhOpConverter() : ActivationOpConverter<TargetT>("tanh") {}
}; };
class SigmoidOpConverter : public ActivationOpConverter { template <typename TargetT>
class SigmoidOpConverter : public ActivationOpConverter<TargetT> {
public: public:
SigmoidOpConverter() : ActivationOpConverter("sigmoid") {} SigmoidOpConverter() : ActivationOpConverter<TargetT>("sigmoid") {}
}; };
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference

@ -18,19 +18,16 @@
#include <vector> #include <vector>
using anakin::graph::GraphGlobalMem; using anakin::graph::GraphGlobalMem;
using anakin::PTuple;
using anakin::AK_FLOAT; using anakin::AK_FLOAT;
using anakin::Precision;
using anakin::saber::NV;
using anakin::saber::X86;
using anakin::saber::Shape; using anakin::saber::Shape;
using anakin::PBlock;
using anakin::PTuple;
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
void AffineChannelOpConverter::operator()( template <typename TargetT>
void AffineChannelOpConverter<TargetT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) { const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
@ -59,7 +56,7 @@ void AffineChannelOpConverter::operator()(
bias_tensor->Resize(bias_t->dims()); bias_tensor->Resize(bias_t->dims());
TensorCopySync((*bias_t), platform::CPUPlace(), bias_tensor.get()); TensorCopySync((*bias_t), platform::CPUPlace(), bias_tensor.get());
engine_->AddOp(op_name, "AffineChannel", {input_name}, {output_name}); this->engine_->AddOp(op_name, "AffineChannel", {input_name}, {output_name});
// Generate the Scale parameter of Anakin. // Generate the Scale parameter of Anakin.
auto scale_shape = framework::vectorize2int(scale_t->dims()); auto scale_shape = framework::vectorize2int(scale_t->dims());
@ -67,7 +64,8 @@ void AffineChannelOpConverter::operator()(
scale_shape.insert(scale_shape.begin(), 1); scale_shape.insert(scale_shape.begin(), 1);
} }
Shape anakin_scale_shape(scale_shape); Shape anakin_scale_shape(scale_shape);
auto *weight1 = GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>( auto *weight1 =
GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(
anakin_scale_shape); anakin_scale_shape);
float *scale_cpu_data = float *scale_cpu_data =
static_cast<float *>(weight1->h_tensor().mutable_data()); static_cast<float *>(weight1->h_tensor().mutable_data());
@ -75,7 +73,7 @@ void AffineChannelOpConverter::operator()(
scale_cpu_data); scale_cpu_data);
weight1->d_tensor().set_shape(anakin_scale_shape); weight1->d_tensor().set_shape(anakin_scale_shape);
weight1->d_tensor().copy_from(weight1->h_tensor()); weight1->d_tensor().copy_from(weight1->h_tensor());
engine_->AddOpAttr(op_name, "weight_1", *weight1); this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
// Generate the Bias parameter of Anakin. // Generate the Bias parameter of Anakin.
auto bias_shape = framework::vectorize2int(bias_t->dims()); auto bias_shape = framework::vectorize2int(bias_t->dims());
@ -83,18 +81,24 @@ void AffineChannelOpConverter::operator()(
bias_shape.insert(bias_shape.begin(), 1); bias_shape.insert(bias_shape.begin(), 1);
} }
Shape anakin_bias_shape(bias_shape); Shape anakin_bias_shape(bias_shape);
auto *weight2 = GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>( auto *weight2 =
GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(
anakin_bias_shape); anakin_bias_shape);
float *bias_cpu_data = float *bias_cpu_data =
static_cast<float *>(weight2->h_tensor().mutable_data()); static_cast<float *>(weight2->h_tensor().mutable_data());
std::copy_n(bias_tensor->data<float>(), bias_tensor->numel(), bias_cpu_data); std::copy_n(bias_tensor->data<float>(), bias_tensor->numel(), bias_cpu_data);
weight2->d_tensor().set_shape(anakin_bias_shape); weight2->d_tensor().set_shape(anakin_bias_shape);
weight2->d_tensor().copy_from(weight2->h_tensor()); weight2->d_tensor().copy_from(weight2->h_tensor());
engine_->AddOpAttr(op_name, "weight_2", *weight2); this->engine_->AddOpAttr(op_name, "weight_2", *weight2);
} }
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(affine_channel, AffineChannelOpConverter); #ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER(
affine_channel, AffineChannelOpConverter<::anakin::saber::NV>);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER(
affine_channel, AffineChannelOpConverter<::anakin::saber::X86>);

@ -21,7 +21,8 @@ namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
class AffineChannelOpConverter : public AnakinOpConverter { template <typename TargetT>
class AffineChannelOpConverter : public AnakinOpConverter<TargetT> {
public: public:
AffineChannelOpConverter() = default; AffineChannelOpConverter() = default;

@ -21,17 +21,16 @@
using anakin::graph::GraphGlobalMem; using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT; using anakin::AK_FLOAT;
using anakin::saber::NV;
using anakin::saber::Shape; using anakin::saber::Shape;
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op, template <typename TargetT>
const framework::BlockDesc &block_desc, void BatchNormOpConverter<TargetT>::operator()(
const framework::Scope &scope, const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
bool test_mode) { const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 1);
std::map<std::string, std::string> inputs; std::map<std::string, std::string> inputs;
@ -48,9 +47,9 @@ void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op,
auto bn_op_name = op_name + ":bn"; auto bn_op_name = op_name + ":bn";
auto bn_output = bn_op_name + "_output"; auto bn_output = bn_op_name + "_output";
engine_->AddOp(bn_op_name, "BatchNorm", {inputs["X"]}, {bn_output}); this->engine_->AddOp(bn_op_name, "BatchNorm", {inputs["X"]}, {bn_output});
engine_->AddOpAttr(bn_op_name, "epsilon", epsilon); this->engine_->AddOpAttr(bn_op_name, "epsilon", epsilon);
engine_->AddOpAttr(bn_op_name, "momentum", static_cast<float>(1.0)); this->engine_->AddOpAttr(bn_op_name, "momentum", static_cast<float>(1.0));
auto scale_op_name = op_name + ":scale"; auto scale_op_name = op_name + ":scale";
auto get_lod_tensor = [this, &scope, &op_name](const std::string &var_name, auto get_lod_tensor = [this, &scope, &op_name](const std::string &var_name,
@ -81,48 +80,54 @@ void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op,
Shape shape1(fill_shape(4, framework::vectorize2int(mean_t.dims()))); Shape shape1(fill_shape(4, framework::vectorize2int(mean_t.dims())));
Shape shape2(fill_shape(4, framework::vectorize2int(variance_t.dims()))); Shape shape2(fill_shape(4, framework::vectorize2int(variance_t.dims())));
auto *weight1 = auto *weight1 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(shape1); GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(shape1);
auto *mean_data = static_cast<float *>(weight1->h_tensor().mutable_data()); auto *mean_data = static_cast<float *>(weight1->h_tensor().mutable_data());
std::copy_n(mean_t.data<float>(), mean_t.numel(), mean_data); std::copy_n(mean_t.data<float>(), mean_t.numel(), mean_data);
engine_->AddOpAttr(bn_op_name, "weight_1", *weight1); this->engine_->AddOpAttr(bn_op_name, "weight_1", *weight1);
auto *weight2 = auto *weight2 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(shape2); GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(shape2);
auto *variance_data = auto *variance_data =
static_cast<float *>(weight2->h_tensor().mutable_data()); static_cast<float *>(weight2->h_tensor().mutable_data());
std::copy_n(variance_t.data<float>(), variance_t.numel(), variance_data); std::copy_n(variance_t.data<float>(), variance_t.numel(), variance_data);
engine_->AddOpAttr(bn_op_name, "weight_2", *weight2); this->engine_->AddOpAttr(bn_op_name, "weight_2", *weight2);
Shape shape3(std::vector<int>({1, 1, 1, 1})); Shape shape3(std::vector<int>({1, 1, 1, 1}));
auto *weight3 = auto *weight3 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(shape3); GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(shape3);
auto *alpha_data = static_cast<float *>(weight3->h_tensor().mutable_data()); auto *alpha_data = static_cast<float *>(weight3->h_tensor().mutable_data());
float weight3_data[] = {1}; float weight3_data[] = {1};
std::copy(std::begin(weight3_data), std::end(weight3_data), alpha_data); std::copy(std::begin(weight3_data), std::end(weight3_data), alpha_data);
engine_->AddOpAttr(bn_op_name, "weight_3", *weight3); this->engine_->AddOpAttr(bn_op_name, "weight_3", *weight3);
Shape scale_shape(fill_shape(4, framework::vectorize2int(scale_t.dims()))); Shape scale_shape(fill_shape(4, framework::vectorize2int(scale_t.dims())));
auto *scale = auto *scale = GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(scale_shape); scale_shape);
auto *scale_data = static_cast<float *>(scale->h_tensor().mutable_data()); auto *scale_data = static_cast<float *>(scale->h_tensor().mutable_data());
std::copy_n(scale_t.data<float>(), scale_t.numel(), scale_data); std::copy_n(scale_t.data<float>(), scale_t.numel(), scale_data);
Shape bias_shape(fill_shape(4, framework::vectorize2int(bias_t.dims()))); Shape bias_shape(fill_shape(4, framework::vectorize2int(bias_t.dims())));
auto *bias = auto *bias = GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(bias_shape); bias_shape);
auto *bias_data = static_cast<float *>(bias->h_tensor().mutable_data()); auto *bias_data = static_cast<float *>(bias->h_tensor().mutable_data());
std::copy_n(bias_t.data<float>(), bias_t.numel(), bias_data); std::copy_n(bias_t.data<float>(), bias_t.numel(), bias_data);
engine_->AddOp(scale_op_name, "Scale", {bn_output}, {output}); this->engine_->AddOp(scale_op_name, "Scale", {bn_output}, {output});
engine_->AddOpAttr(scale_op_name, "axis", 1); this->engine_->AddOpAttr(scale_op_name, "axis", 1);
engine_->AddOpAttr(scale_op_name, "num_axes", 1); this->engine_->AddOpAttr(scale_op_name, "num_axes", 1);
engine_->AddOpAttr(scale_op_name, "bias_term", true); this->engine_->AddOpAttr(scale_op_name, "bias_term", true);
engine_->AddOpAttr(scale_op_name, "weight_1", *scale); this->engine_->AddOpAttr(scale_op_name, "weight_1", *scale);
engine_->AddOpAttr(scale_op_name, "weight_2", *bias); this->engine_->AddOpAttr(scale_op_name, "weight_2", *bias);
} }
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(batch_norm, BatchNormOpConverter); #ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER(batch_norm,
BatchNormOpConverter<::anakin::saber::NV>);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER(batch_norm,
BatchNormOpConverter<::anakin::saber::X86>);

@ -20,7 +20,8 @@ namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
class BatchNormOpConverter : public AnakinOpConverter { template <typename TargetT>
class BatchNormOpConverter : public AnakinOpConverter<TargetT> {
public: public:
BatchNormOpConverter() = default; BatchNormOpConverter() = default;

@ -15,38 +15,32 @@
#include "paddle/fluid/inference/anakin/convert/concat.h" #include "paddle/fluid/inference/anakin/convert/concat.h"
#include <algorithm> #include <algorithm>
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::Precision;
using anakin::saber::NV;
using anakin::saber::X86;
using anakin::saber::Shape;
using anakin::PBlock;
using anakin::PTuple;
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
void ConcatOpConverter::operator()(const framework::proto::OpDesc &op, template <typename TargetT>
const framework::BlockDesc &block_desc, void ConcatOpConverter<TargetT>::operator()(
const framework::Scope &scope, const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
bool test_mode) { const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
int axis = boost::get<int>(op_desc.GetAttr("axis")); int axis = boost::get<int>(op_desc.GetAttr("axis"));
auto input_names = op_desc.Input("X"); auto input_names = op_desc.Input("X");
// PADDLE_ENFORCE(axis > 0,
// "The axis attr of Concat op should be large than 0 for trt");
auto y_name = op_desc.Output("Out").front(); auto y_name = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
engine_->AddOp(op_name, "Concat", input_names, {y_name}); this->engine_->AddOp(op_name, "Concat", input_names, {y_name});
engine_->AddOpAttr(op_name, "axis", axis); this->engine_->AddOpAttr(op_name, "axis", axis);
} }
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(concat, ConcatOpConverter); #ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER(concat,
ConcatOpConverter<::anakin::saber::NV>);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER(concat,
ConcatOpConverter<::anakin::saber::X86>);

@ -20,7 +20,8 @@ namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
class ConcatOpConverter : public AnakinOpConverter { template <typename TargetT>
class ConcatOpConverter : public AnakinOpConverter<TargetT> {
public: public:
ConcatOpConverter() = default; ConcatOpConverter() = default;

@ -18,19 +18,18 @@
#include <vector> #include <vector>
using anakin::graph::GraphGlobalMem; using anakin::graph::GraphGlobalMem;
using anakin::PTuple;
using anakin::AK_FLOAT; using anakin::AK_FLOAT;
using anakin::saber::NV;
using anakin::saber::Shape; using anakin::saber::Shape;
using anakin::PTuple;
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op, template <typename TargetT>
const framework::BlockDesc &block_desc, void Conv2dOpConverter<TargetT>::operator()(
const framework::Scope &scope, const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
bool test_mode) { const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL); PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL); PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL);
@ -39,7 +38,7 @@ void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op,
auto input_name = op_desc.Input("Input").front(); auto input_name = op_desc.Input("Input").front();
auto output_name = op_desc.Output("Output").front(); auto output_name = op_desc.Output("Output").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front();
engine_->AddOp(op_name, "Convolution", {input_name}, {output_name}); this->engine_->AddOp(op_name, "Convolution", {input_name}, {output_name});
auto *filter_v = scope.FindVar(op_desc.Input("Filter").front()); auto *filter_v = scope.FindVar(op_desc.Input("Filter").front());
PADDLE_ENFORCE_NOT_NULL(filter_v); PADDLE_ENFORCE_NOT_NULL(filter_v);
@ -51,38 +50,44 @@ void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op,
PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL); PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
// const int n_output = weight_tensor->dims()[0];
// const int n_input = weight_tensor->dims()[1];
const int filter_h = weight_tensor->dims()[2]; const int filter_h = weight_tensor->dims()[2];
const int filter_w = weight_tensor->dims()[3]; const int filter_w = weight_tensor->dims()[3];
// auto filter_num = n_input * filter_h * filter_w ;
auto filter_num = weight_tensor->dims()[0]; auto filter_num = weight_tensor->dims()[0];
engine_->AddOpAttr<int>(op_name, "filter_num", filter_num); this->engine_->template AddOpAttr<int>(op_name, "filter_num", filter_num);
engine_->AddOpAttr<PTuple<int>>(op_name, "kernel_size", {filter_h, filter_w}); this->engine_->template AddOpAttr<PTuple<int>>(op_name, "kernel_size",
{filter_h, filter_w});
auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides")); auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
engine_->AddOpAttr<PTuple<int>>(op_name, "strides", strides); this->engine_->template AddOpAttr<PTuple<int>>(op_name, "strides", strides);
auto paddings = boost::get<std::vector<int>>(op_desc.GetAttr("paddings")); auto paddings = boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
engine_->AddOpAttr<PTuple<int>>(op_name, "padding", paddings); this->engine_->template AddOpAttr<PTuple<int>>(op_name, "padding", paddings);
auto dilations = boost::get<std::vector<int>>(op_desc.GetAttr("dilations")); auto dilations = boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
engine_->AddOpAttr<PTuple<int>>(op_name, "dilation_rate", dilations); this->engine_->template AddOpAttr<PTuple<int>>(op_name, "dilation_rate",
dilations);
const int groups = boost::get<int>(op_desc.GetAttr("groups")); const int groups = boost::get<int>(op_desc.GetAttr("groups"));
engine_->AddOpAttr(op_name, "group", groups); this->engine_->AddOpAttr(op_name, "group", groups);
engine_->AddOpAttr(op_name, "axis", 1); this->engine_->AddOpAttr(op_name, "axis", 1);
engine_->AddOpAttr(op_name, "bias_term", false); this->engine_->AddOpAttr(op_name, "bias_term", false);
auto weight_shape = framework::vectorize2int(filter_t->dims()); auto weight_shape = framework::vectorize2int(filter_t->dims());
Shape anakin_shape(weight_shape); Shape anakin_shape(weight_shape);
auto *weight1 = auto *weight1 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(anakin_shape); GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(
anakin_shape);
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data()); float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
std::copy_n(weight_tensor->data<float>(), weight_tensor->numel(), cpu_data); std::copy_n(weight_tensor->data<float>(), weight_tensor->numel(), cpu_data);
weight1->d_tensor().set_shape(anakin_shape); weight1->d_tensor().set_shape(anakin_shape);
weight1->d_tensor().copy_from(weight1->h_tensor()); weight1->d_tensor().copy_from(weight1->h_tensor());
engine_->AddOpAttr(op_name, "weight_1", *weight1); this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
} }
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(conv2d, Conv2dOpConverter); REGISTER_CPU_ANAKIN_OP_CONVERTER(conv2d,
Conv2dOpConverter<::anakin::saber::X86>);
#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER(conv2d,
Conv2dOpConverter<::anakin::saber::NV>);
#endif

@ -20,7 +20,8 @@ namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
class Conv2dOpConverter : public AnakinOpConverter { template <typename TargetT>
class Conv2dOpConverter : public AnakinOpConverter<TargetT> {
public: public:
Conv2dOpConverter() = default; Conv2dOpConverter() = default;

@ -18,19 +18,18 @@
#include <vector> #include <vector>
using anakin::graph::GraphGlobalMem; using anakin::graph::GraphGlobalMem;
using anakin::PTuple;
using anakin::AK_FLOAT; using anakin::AK_FLOAT;
using anakin::saber::NV;
using anakin::saber::Shape; using anakin::saber::Shape;
using anakin::PTuple;
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op, template <typename TargetT>
const framework::BlockDesc &block_desc, void Conv2dFusionOpConverter<TargetT>::operator()(
const framework::Scope &scope, const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
bool test_mode) { const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL); PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL); PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL);
@ -40,7 +39,7 @@ void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op,
auto input_name = op_desc.Input("Input").front(); auto input_name = op_desc.Input("Input").front();
auto output_name = op_desc.Output("Output").front(); auto output_name = op_desc.Output("Output").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front();
engine_->AddOp(op_name, "Convolution", {input_name}, {output_name}); this->engine_->AddOp(op_name, "Convolution", {input_name}, {output_name});
auto *filter_v = scope.FindVar(op_desc.Input("Filter").front()); auto *filter_v = scope.FindVar(op_desc.Input("Filter").front());
PADDLE_ENFORCE_NOT_NULL(filter_v); PADDLE_ENFORCE_NOT_NULL(filter_v);
@ -63,28 +62,31 @@ void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op,
const int filter_w = weight_tensor->dims()[3]; const int filter_w = weight_tensor->dims()[3];
// auto filter_num = n_input * filter_h * filter_w ; // auto filter_num = n_input * filter_h * filter_w ;
auto filter_num = weight_tensor->dims()[0]; auto filter_num = weight_tensor->dims()[0];
engine_->AddOpAttr<int>(op_name, "filter_num", filter_num); this->engine_->template AddOpAttr<int>(op_name, "filter_num", filter_num);
engine_->AddOpAttr<PTuple<int>>(op_name, "kernel_size", {filter_h, filter_w}); this->engine_->template AddOpAttr<PTuple<int>>(op_name, "kernel_size",
{filter_h, filter_w});
auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides")); auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
engine_->AddOpAttr<PTuple<int>>(op_name, "strides", strides); this->engine_->template AddOpAttr<PTuple<int>>(op_name, "strides", strides);
auto paddings = boost::get<std::vector<int>>(op_desc.GetAttr("paddings")); auto paddings = boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
engine_->AddOpAttr<PTuple<int>>(op_name, "padding", paddings); this->engine_->template AddOpAttr<PTuple<int>>(op_name, "padding", paddings);
auto dilations = boost::get<std::vector<int>>(op_desc.GetAttr("dilations")); auto dilations = boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
engine_->AddOpAttr<PTuple<int>>(op_name, "dilation_rate", dilations); this->engine_->template AddOpAttr<PTuple<int>>(op_name, "dilation_rate",
dilations);
const int groups = boost::get<int>(op_desc.GetAttr("groups")); const int groups = boost::get<int>(op_desc.GetAttr("groups"));
engine_->AddOpAttr(op_name, "group", groups); this->engine_->AddOpAttr(op_name, "group", groups);
engine_->AddOpAttr(op_name, "axis", 1); this->engine_->AddOpAttr(op_name, "axis", 1);
engine_->AddOpAttr(op_name, "bias_term", true); this->engine_->AddOpAttr(op_name, "bias_term", true);
auto weight_shape = framework::vectorize2int(filter_t->dims()); auto weight_shape = framework::vectorize2int(filter_t->dims());
Shape anakin_shape(weight_shape); Shape anakin_shape(weight_shape);
auto *weight1 = auto *weight1 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(anakin_shape); GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(
anakin_shape);
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data()); float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
std::copy_n(weight_tensor->data<float>(), weight_tensor->numel(), cpu_data); std::copy_n(weight_tensor->data<float>(), weight_tensor->numel(), cpu_data);
weight1->d_tensor().set_shape(anakin_shape); weight1->d_tensor().set_shape(anakin_shape);
weight1->d_tensor().copy_from(weight1->h_tensor()); weight1->d_tensor().copy_from(weight1->h_tensor());
engine_->AddOpAttr(op_name, "weight_1", *weight1); this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
auto bias_shape = framework::vectorize2int(b_t->dims()); auto bias_shape = framework::vectorize2int(b_t->dims());
framework::LoDTensor bias_tensor; framework::LoDTensor bias_tensor;
@ -98,17 +100,24 @@ void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op,
// bias_shape.push_back(1); // bias_shape.push_back(1);
Shape anakin_bias_shape(bias_shape); Shape anakin_bias_shape(bias_shape);
auto *weight2 = GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>( auto *weight2 =
GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(
anakin_bias_shape); anakin_bias_shape);
float *cpu_data2 = static_cast<float *>(weight2->h_tensor().mutable_data()); float *cpu_data2 = static_cast<float *>(weight2->h_tensor().mutable_data());
std::copy_n(bias_data, bias_tensor.numel(), cpu_data2); std::copy_n(bias_data, bias_tensor.numel(), cpu_data2);
weight2->d_tensor().set_shape(anakin_bias_shape); weight2->d_tensor().set_shape(anakin_bias_shape);
weight2->d_tensor().copy_from(weight2->h_tensor()); weight2->d_tensor().copy_from(weight2->h_tensor());
engine_->AddOpAttr(op_name, "weight_2", *weight2); this->engine_->AddOpAttr(op_name, "weight_2", *weight2);
} }
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(conv2d_fusion, Conv2dFusionOpConverter); #ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER(conv2d_fusion,
Conv2dFusionOpConverter<::anakin::saber::NV>);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER(conv2d_fusion,
Conv2dFusionOpConverter<::anakin::saber::X86>);

@ -20,7 +20,8 @@ namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
class Conv2dFusionOpConverter : public AnakinOpConverter { template <typename TargetT>
class Conv2dFusionOpConverter : public AnakinOpConverter<TargetT> {
public: public:
Conv2dFusionOpConverter() = default; Conv2dFusionOpConverter() = default;

@ -17,17 +17,14 @@
#include <map> #include <map>
#include <vector> #include <vector>
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::saber::NV;
using anakin::saber::Shape;
using anakin::PTuple; using anakin::PTuple;
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
void DensityPriorBoxOpConverter::operator()( template <typename TargetT>
void DensityPriorBoxOpConverter<TargetT>::operator()(
const framework::proto::OpDesc& op, const framework::BlockDesc& block_desc, const framework::proto::OpDesc& op, const framework::BlockDesc& block_desc,
const framework::Scope& scope, bool test_mode) { const framework::Scope& scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
@ -81,27 +78,44 @@ void DensityPriorBoxOpConverter::operator()(
std::vector<float> temp_v = {}; std::vector<float> temp_v = {};
engine_->AddOp(op_name, "PriorBox", {input_name, image_name}, {output_name}); this->engine_->AddOp(op_name, "PriorBox", {input_name, image_name},
engine_->AddOpAttr<PTuple<float>>(op_name, "min_size", min_sizes); {output_name});
engine_->AddOpAttr<PTuple<float>>(op_name, "max_size", max_sizes); this->engine_->template AddOpAttr<PTuple<float>>(op_name, "min_size",
engine_->AddOpAttr<PTuple<float>>(op_name, "aspect_ratio", aspect_ratios); min_sizes);
engine_->AddOpAttr<PTuple<float>>(op_name, "fixed_size", fixed_sizes); this->engine_->template AddOpAttr<PTuple<float>>(op_name, "max_size",
engine_->AddOpAttr<PTuple<float>>(op_name, "fixed_ratio", fixed_ratios); max_sizes);
engine_->AddOpAttr<PTuple<float>>(op_name, "density", dens); this->engine_->template AddOpAttr<PTuple<float>>(op_name, "aspect_ratio",
engine_->AddOpAttr(op_name, "is_flip", is_flip); aspect_ratios);
engine_->AddOpAttr(op_name, "is_clip", is_clip); this->engine_->template AddOpAttr<PTuple<float>>(op_name, "fixed_size",
engine_->AddOpAttr<PTuple<float>>(op_name, "variance", variances); fixed_sizes);
engine_->AddOpAttr(op_name, "img_h", static_cast<int>(0)); this->engine_->template AddOpAttr<PTuple<float>>(op_name, "fixed_ratio",
engine_->AddOpAttr(op_name, "img_w", static_cast<int>(0)); fixed_ratios);
engine_->AddOpAttr(op_name, "step_h", step_h); this->engine_->template AddOpAttr<PTuple<float>>(op_name, "density", dens);
engine_->AddOpAttr(op_name, "step_w", step_w); this->engine_->AddOpAttr(op_name, "is_flip", is_flip);
engine_->AddOpAttr(op_name, "offset", offset); this->engine_->AddOpAttr(op_name, "is_clip", is_clip);
engine_->AddOpAttr<PTuple<std::string>>(op_name, "order", t_order); this->engine_->template AddOpAttr<PTuple<float>>(op_name, "variance",
variances);
this->engine_->AddOpAttr(op_name, "img_h", static_cast<int>(0));
this->engine_->AddOpAttr(op_name, "img_w", static_cast<int>(0));
this->engine_->AddOpAttr(op_name, "step_h", step_h);
this->engine_->AddOpAttr(op_name, "step_w", step_w);
this->engine_->AddOpAttr(op_name, "offset", offset);
this->engine_->template AddOpAttr<PTuple<std::string>>(op_name, "order",
t_order);
} }
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(density_prior_box, DensityPriorBoxOpConverter); #ifdef PADDLE_WITH_CUDA
REGISTER_ANAKIN_OP_CONVERTER(prior_box, DensityPriorBoxOpConverter); REGISTER_CUDA_ANAKIN_OP_CONVERTER(
density_prior_box, DensityPriorBoxOpConverter<::anakin::saber::NV>);
REGISTER_CUDA_ANAKIN_OP_CONVERTER(
prior_box, DensityPriorBoxOpConverter<::anakin::saber::NV>);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER(
density_prior_box, DensityPriorBoxOpConverter<::anakin::saber::X86>);
REGISTER_CPU_ANAKIN_OP_CONVERTER(
prior_box, DensityPriorBoxOpConverter<::anakin::saber::X86>);

@ -22,7 +22,8 @@ namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
class DensityPriorBoxOpConverter : public AnakinOpConverter { template <typename TargetT>
class DensityPriorBoxOpConverter : public AnakinOpConverter<TargetT> {
public: public:
DensityPriorBoxOpConverter() = default; DensityPriorBoxOpConverter() = default;

@ -16,19 +16,14 @@
#include <algorithm> #include <algorithm>
#include <map> #include <map>
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::saber::NV;
using anakin::saber::Shape;
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
void DetectionOutOpConverter::operator()(const framework::proto::OpDesc &op, template <typename TargetT>
const framework::BlockDesc &block_desc, void DetectionOutOpConverter<TargetT>::operator()(
const framework::Scope &scope, const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
bool test_mode) { const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
auto target_name = op_desc.Input("TargetBox").front(); auto target_name = op_desc.Input("TargetBox").front();
auto prior_box_name = op_desc.Input("PriorBox").front(); auto prior_box_name = op_desc.Input("PriorBox").front();
@ -52,22 +47,28 @@ void DetectionOutOpConverter::operator()(const framework::proto::OpDesc &op,
"Not support encode_center_size code_type in DetectionOut of anakin"); "Not support encode_center_size code_type in DetectionOut of anakin");
} }
engine_->AddOp(op_name, "DetectionOutput", this->engine_->AddOp(op_name, "DetectionOutput",
{target_name, scores_name, prior_box_name}, {output_name}); {target_name, scores_name, prior_box_name},
engine_->AddOpAttr(op_name, "share_location", true); {output_name});
engine_->AddOpAttr(op_name, "variance_encode_in_target", false); this->engine_->AddOpAttr(op_name, "share_location", true);
engine_->AddOpAttr(op_name, "class_num", static_cast<int>(0)); this->engine_->AddOpAttr(op_name, "variance_encode_in_target", false);
engine_->AddOpAttr(op_name, "background_id", background_label); this->engine_->AddOpAttr(op_name, "class_num", static_cast<int>(0));
engine_->AddOpAttr(op_name, "keep_top_k", keep_top_k); this->engine_->AddOpAttr(op_name, "background_id", background_label);
engine_->AddOpAttr(op_name, "code_type", anakin_code_type); this->engine_->AddOpAttr(op_name, "keep_top_k", keep_top_k);
engine_->AddOpAttr(op_name, "conf_thresh", score_threshold); this->engine_->AddOpAttr(op_name, "code_type", anakin_code_type);
engine_->AddOpAttr(op_name, "nms_top_k", nms_top_k); this->engine_->AddOpAttr(op_name, "conf_thresh", score_threshold);
engine_->AddOpAttr(op_name, "nms_thresh", nms_threshold); this->engine_->AddOpAttr(op_name, "nms_top_k", nms_top_k);
engine_->AddOpAttr(op_name, "nms_eta", nms_eta); this->engine_->AddOpAttr(op_name, "nms_thresh", nms_threshold);
this->engine_->AddOpAttr(op_name, "nms_eta", nms_eta);
} }
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(detection_out, DetectionOutOpConverter); #ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER(detection_out,
DetectionOutOpConverter<::anakin::saber::NV>);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER(detection_out,
DetectionOutOpConverter<::anakin::saber::X86>);

@ -22,7 +22,8 @@ namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
class DetectionOutOpConverter : public AnakinOpConverter { template <typename TargetT>
class DetectionOutOpConverter : public AnakinOpConverter<TargetT> {
public: public:
DetectionOutOpConverter() = default; DetectionOutOpConverter() = default;

@ -19,21 +19,16 @@
using anakin::graph::GraphGlobalMem; using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT; using anakin::AK_FLOAT;
using anakin::Precision;
using anakin::saber::NV;
using anakin::saber::X86;
using anakin::saber::Shape; using anakin::saber::Shape;
using anakin::PBlock;
using anakin::PTuple;
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
void DropoutOpConverter::operator()(const framework::proto::OpDesc &op, template <typename TargetT>
const framework::BlockDesc &block_desc, void DropoutOpConverter<TargetT>::operator()(
const framework::Scope &scope, const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
bool test_mode) { const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Mask").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Output("Mask").size(), 1);
@ -43,25 +38,30 @@ void DropoutOpConverter::operator()(const framework::proto::OpDesc &op,
auto out_name = op_desc.Output("Out").front(); auto out_name = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
engine_->AddOp(op_name, "Scale", {x_name}, {out_name}); this->engine_->AddOp(op_name, "Scale", {x_name}, {out_name});
auto dropout_prob = boost::get<float>(op_desc.GetAttr("dropout_prob")); auto dropout_prob = boost::get<float>(op_desc.GetAttr("dropout_prob"));
auto factor = 1 - dropout_prob; auto factor = 1 - dropout_prob;
Shape shape1(std::vector<int>({1, 1, 1, 1})); Shape shape1(std::vector<int>({1, 1, 1, 1}));
auto *weight1 = auto *weight1 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(shape1); GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(shape1);
auto *factor_data = static_cast<float *>(weight1->h_tensor().mutable_data()); auto *factor_data = static_cast<float *>(weight1->h_tensor().mutable_data());
float weight1_data[] = {factor}; float weight1_data[] = {factor};
std::copy(std::begin(weight1_data), std::end(weight1_data), factor_data); std::copy(std::begin(weight1_data), std::end(weight1_data), factor_data);
engine_->AddOpAttr(op_name, "weight_1", *weight1); this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
engine_->AddOpAttr(op_name, "axis", 0); this->engine_->AddOpAttr(op_name, "axis", 0);
engine_->AddOpAttr(op_name, "num_axes", 0); this->engine_->AddOpAttr(op_name, "num_axes", 0);
engine_->AddOpAttr(op_name, "bias_term", false); this->engine_->AddOpAttr(op_name, "bias_term", false);
} }
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(dropout, DropoutOpConverter); #ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER(dropout,
DropoutOpConverter<::anakin::saber::NV>);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER(dropout,
DropoutOpConverter<::anakin::saber::X86>);

@ -20,7 +20,8 @@ namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
class DropoutOpConverter : public AnakinOpConverter { template <typename TargetT>
class DropoutOpConverter : public AnakinOpConverter<TargetT> {
public: public:
DropoutOpConverter() = default; DropoutOpConverter() = default;

@ -19,18 +19,15 @@
using anakin::graph::GraphGlobalMem; using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT; using anakin::AK_FLOAT;
using anakin::Precision;
using anakin::saber::NV;
using anakin::saber::X86;
using anakin::saber::Shape; using anakin::saber::Shape;
using anakin::PBlock;
using anakin::PTuple; using anakin::PTuple;
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
void ElementwiseAddOpConverter::operator()( template <typename TargetT>
void ElementwiseAddOpConverter<TargetT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) { const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
@ -43,14 +40,16 @@ void ElementwiseAddOpConverter::operator()(
auto out_name = op_desc.Output("Out").front(); auto out_name = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
engine_->AddOp(op_name, "Eltwise", {x_name, y_name}, {out_name}); this->engine_->AddOp(op_name, "Eltwise", {x_name, y_name}, {out_name});
std::string elementwise_type = "Add"; std::string elementwise_type = "Add";
engine_->AddOpAttr<std::string>(op_name, "type", elementwise_type); this->engine_->template AddOpAttr<std::string>(op_name, "type",
elementwise_type);
std::vector<float> coeff = {1.0, 1.0}; std::vector<float> coeff = {1.0, 1.0};
engine_->AddOpAttr<PTuple<float>>(op_name, "coeff", coeff); this->engine_->template AddOpAttr<PTuple<float>>(op_name, "coeff", coeff);
} }
void ElementwiseMulOpConverter::operator()( template <typename TargetT>
void ElementwiseMulOpConverter<TargetT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) { const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
@ -63,26 +62,25 @@ void ElementwiseMulOpConverter::operator()(
auto out_name = op_desc.Output("Out").front(); auto out_name = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
engine_->AddOp(op_name, "Scale", {x_name, y_name}, {out_name}); this->engine_->AddOp(op_name, "Eltwise", {x_name, y_name}, {out_name});
// Fill a number to weight_1 as a placeholder. std::string elementwise_type = "Prod";
Shape shape1(std::vector<int>({1, 1, 1, 1})); this->engine_->template AddOpAttr<std::string>(op_name, "type",
auto *weight1 = elementwise_type);
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(shape1); std::vector<float> coeff = {1.0, 1.0};
auto *placeholder_data = this->engine_->template AddOpAttr<PTuple<float>>(op_name, "coeff", coeff);
static_cast<float *>(weight1->h_tensor().mutable_data());
float weight1_data[] = {1};
std::copy(std::begin(weight1_data), std::end(weight1_data), placeholder_data);
engine_->AddOpAttr(op_name, "weight_1", *weight1);
auto axis = boost::get<int>(op_desc.GetAttr("axis"));
engine_->AddOpAttr(op_name, "axis", axis);
engine_->AddOpAttr(op_name, "num_axes", 1);
engine_->AddOpAttr(op_name, "bias_term", false);
} }
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(elementwise_add, ElementwiseAddOpConverter); #ifdef PADDLE_WITH_CUDA
REGISTER_ANAKIN_OP_CONVERTER(elementwise_mul, ElementwiseMulOpConverter); REGISTER_CUDA_ANAKIN_OP_CONVERTER(
elementwise_add, ElementwiseAddOpConverter<::anakin::saber::NV>);
REGISTER_CUDA_ANAKIN_OP_CONVERTER(
elementwise_mul, ElementwiseMulOpConverter<::anakin::saber::NV>);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER(
elementwise_add, ElementwiseAddOpConverter<::anakin::saber::X86>);
REGISTER_CPU_ANAKIN_OP_CONVERTER(
elementwise_mul, ElementwiseMulOpConverter<::anakin::saber::X86>);

@ -20,7 +20,8 @@ namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
class ElementwiseAddOpConverter : public AnakinOpConverter { template <typename TargetT>
class ElementwiseAddOpConverter : public AnakinOpConverter<TargetT> {
public: public:
ElementwiseAddOpConverter() = default; ElementwiseAddOpConverter() = default;
@ -33,7 +34,8 @@ class ElementwiseAddOpConverter : public AnakinOpConverter {
private: private:
}; };
class ElementwiseMulOpConverter : public AnakinOpConverter { template <typename TargetT>
class ElementwiseMulOpConverter : public AnakinOpConverter<TargetT> {
public: public:
ElementwiseMulOpConverter() = default; ElementwiseMulOpConverter() = default;

@ -19,17 +19,16 @@
using anakin::graph::GraphGlobalMem; using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT; using anakin::AK_FLOAT;
using anakin::saber::NV;
using anakin::saber::Shape; using anakin::saber::Shape;
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op, template <typename TargetT>
const framework::BlockDesc &block_desc, void FcBaseOpConverter<TargetT>::operator()(
const framework::Scope &scope, const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
bool test_mode) { const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
auto input_names = op_desc.InputNames(); auto input_names = op_desc.InputNames();
bool with_bias = input_names.size() == 3; bool with_bias = input_names.size() == 3;
@ -51,13 +50,13 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
auto input_name = op_desc.Input(i_name).front(); auto input_name = op_desc.Input(i_name).front();
auto output_name = op_desc.Output("Out").front(); auto output_name = op_desc.Output("Out").front();
engine_->AddOp(op_name, "Dense", {input_name}, {output_name}); this->engine_->AddOp(op_name, "Dense", {input_name}, {output_name});
engine_->AddOpAttr(op_name, "bias_term", with_bias); this->engine_->AddOpAttr(op_name, "bias_term", with_bias);
engine_->AddOpAttr(op_name, "axis", 1); this->engine_->AddOpAttr(op_name, "axis", 1);
auto weight_shape = framework::vectorize2int(y_t->dims()); auto weight_shape = framework::vectorize2int(y_t->dims());
int out_dim = weight_shape[1]; int out_dim = weight_shape[1];
engine_->AddOpAttr(op_name, "out_dim", out_dim); this->engine_->AddOpAttr(op_name, "out_dim", out_dim);
const int w_m = weight_shape[0]; const int w_m = weight_shape[0];
const int w_k = weight_shape[1]; const int w_k = weight_shape[1];
@ -79,12 +78,13 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
} }
} }
auto *weight1 = auto *weight1 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(anakin_shape); GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(
anakin_shape);
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data()); float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
std::copy_n(trans_weight_data.data(), weight_tensor.numel(), cpu_data); std::copy_n(trans_weight_data.data(), weight_tensor.numel(), cpu_data);
weight1->d_tensor().set_shape(anakin_shape); weight1->d_tensor().set_shape(anakin_shape);
weight1->d_tensor().copy_from(weight1->h_tensor()); weight1->d_tensor().copy_from(weight1->h_tensor());
engine_->AddOpAttr(op_name, "weight_1", *weight1); this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
// get bias // get bias
if (with_bias) { if (with_bias) {
@ -104,13 +104,14 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
// bias_shape.push_back(1); // bias_shape.push_back(1);
Shape anakin_bias_shape(bias_shape); Shape anakin_bias_shape(bias_shape);
auto *weight2 = GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>( auto *weight2 =
GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(
anakin_bias_shape); anakin_bias_shape);
float *cpu_data2 = static_cast<float *>(weight2->h_tensor().mutable_data()); float *cpu_data2 = static_cast<float *>(weight2->h_tensor().mutable_data());
std::copy_n(bias_data, bias_tensor.numel(), cpu_data2); std::copy_n(bias_data, bias_tensor.numel(), cpu_data2);
weight2->d_tensor().set_shape(anakin_bias_shape); weight2->d_tensor().set_shape(anakin_bias_shape);
weight2->d_tensor().copy_from(weight2->h_tensor()); weight2->d_tensor().copy_from(weight2->h_tensor());
engine_->AddOpAttr(op_name, "weight_2", *weight2); this->engine_->AddOpAttr(op_name, "weight_2", *weight2);
} }
} }
@ -118,5 +119,10 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(mul, MulOpConverter); #ifdef PADDLE_WITH_CUDA
REGISTER_ANAKIN_OP_CONVERTER(fc, FcOpConverter); REGISTER_CUDA_ANAKIN_OP_CONVERTER(mul, MulOpConverter<::anakin::saber::NV>);
REGISTER_CUDA_ANAKIN_OP_CONVERTER(fc, FcOpConverter<::anakin::saber::NV>);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER(mul, MulOpConverter<::anakin::saber::X86>);
REGISTER_CPU_ANAKIN_OP_CONVERTER(fc, FcOpConverter<::anakin::saber::X86>);

@ -20,7 +20,8 @@ namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
class FcBaseOpConverter : public AnakinOpConverter { template <typename TargetT>
class FcBaseOpConverter : public AnakinOpConverter<TargetT> {
public: public:
FcBaseOpConverter() = default; FcBaseOpConverter() = default;
@ -32,13 +33,15 @@ class FcBaseOpConverter : public AnakinOpConverter {
}; };
// with bias // with bias
class FcOpConverter : public FcBaseOpConverter { template <typename TargetT>
class FcOpConverter : public FcBaseOpConverter<TargetT> {
public: public:
FcOpConverter() = default; FcOpConverter() = default;
}; };
// without bias // without bias
class MulOpConverter : public FcBaseOpConverter { template <typename TargetT>
class MulOpConverter : public FcBaseOpConverter<TargetT> {
public: public:
MulOpConverter() = default; MulOpConverter() = default;
}; };

@ -15,20 +15,16 @@
#include "paddle/fluid/inference/anakin/convert/flatten.h" #include "paddle/fluid/inference/anakin/convert/flatten.h"
#include <vector> #include <vector>
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::saber::NV;
using anakin::saber::Shape;
using anakin::PTuple; using anakin::PTuple;
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
void FlattenOpConverter::operator()(const framework::proto::OpDesc &op, template <typename TargetT>
const framework::BlockDesc &block_desc, void FlattenOpConverter<TargetT>::operator()(
const framework::Scope &scope, const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
bool test_mode) { const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1UL); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1UL); PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1UL);
@ -41,12 +37,17 @@ void FlattenOpConverter::operator()(const framework::proto::OpDesc &op,
std::vector<int> out_dims = {0, -1, 1, 1}; std::vector<int> out_dims = {0, -1, 1, 1};
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
engine_->AddOp(op_name, "Reshape", {input}, {output}); this->engine_->AddOp(op_name, "Reshape", {input}, {output});
engine_->AddOpAttr<PTuple<int>>(op_name, "dims", out_dims); this->engine_->template AddOpAttr<PTuple<int>>(op_name, "dims", out_dims);
} }
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(flatten, FlattenOpConverter); #ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER(flatten,
FlattenOpConverter<::anakin::saber::NV>);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER(flatten,
FlattenOpConverter<::anakin::saber::X86>);

@ -20,7 +20,8 @@ namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
class FlattenOpConverter : public AnakinOpConverter { template <typename TargetT>
class FlattenOpConverter : public AnakinOpConverter<TargetT> {
public: public:
FlattenOpConverter() = default; FlattenOpConverter() = default;

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save