|
|
@ -130,12 +130,13 @@ class ConvMKLDNNHandler : public platform::MKLDNNHandler {
|
|
|
|
|
|
|
|
|
|
|
|
std::shared_ptr<mkldnn::memory> AcquireWeightsMemoryFromPrimitive(
|
|
|
|
std::shared_ptr<mkldnn::memory> AcquireWeightsMemoryFromPrimitive(
|
|
|
|
const std::shared_ptr<mkldnn::memory> user_weights_memory_p,
|
|
|
|
const std::shared_ptr<mkldnn::memory> user_weights_memory_p,
|
|
|
|
std::vector<mkldnn::primitive>& pipeline) { // NOLINT
|
|
|
|
std::vector<mkldnn::primitive>& pipeline, // NOLINT
|
|
|
|
|
|
|
|
bool is_persistent = false) {
|
|
|
|
auto user_weights_pd = user_weights_memory_p->get_primitive_desc();
|
|
|
|
auto user_weights_pd = user_weights_memory_p->get_primitive_desc();
|
|
|
|
auto weights_pd = conv_pd_->weights_primitive_desc();
|
|
|
|
auto weights_pd = conv_pd_->weights_primitive_desc();
|
|
|
|
return this->AcquireMemory(weights_pd, user_weights_pd,
|
|
|
|
return this->AcquireMemory(weights_pd, user_weights_pd,
|
|
|
|
user_weights_memory_p, "@weights_mem_p",
|
|
|
|
user_weights_memory_p, "@weights_mem_p",
|
|
|
|
pipeline);
|
|
|
|
pipeline, is_persistent);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::shared_ptr<mkldnn::memory> AcquireBiasMemoryFromPrimitive(
|
|
|
|
std::shared_ptr<mkldnn::memory> AcquireBiasMemoryFromPrimitive(
|
|
|
@ -266,6 +267,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
|
|
|
|
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
|
|
|
|
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
|
|
|
|
"It must use CPUPlace.");
|
|
|
|
"It must use CPUPlace.");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const bool is_test = ctx.Attr<bool>("is_test");
|
|
|
|
|
|
|
|
|
|
|
|
auto& dev_ctx =
|
|
|
|
auto& dev_ctx =
|
|
|
|
ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
|
|
|
|
ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
|
|
|
|
const auto& mkldnn_engine = dev_ctx.GetEngine();
|
|
|
|
const auto& mkldnn_engine = dev_ctx.GetEngine();
|
|
|
@ -296,6 +299,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
|
|
|
|
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
|
|
|
|
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
|
|
|
|
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
|
|
|
|
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
|
|
|
|
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
|
|
|
|
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
|
|
|
|
|
|
|
|
bool fuse_relu = ctx.Attr<bool>("fuse_relu");
|
|
|
|
int groups = ctx.Attr<int>("groups");
|
|
|
|
int groups = ctx.Attr<int>("groups");
|
|
|
|
|
|
|
|
|
|
|
|
// TODO(pzelazko-intel) add support for group convolution and dilation
|
|
|
|
// TODO(pzelazko-intel) add support for group convolution and dilation
|
|
|
@ -348,11 +352,12 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
|
|
|
|
bias_tz = paddle::framework::vectorize2int(bias->dims());
|
|
|
|
bias_tz = paddle::framework::vectorize2int(bias->dims());
|
|
|
|
auto bias_md = platform::MKLDNNMemDesc(
|
|
|
|
auto bias_md = platform::MKLDNNMemDesc(
|
|
|
|
bias_tz, platform::MKLDNNGetDataType<T>(), memory::format::x);
|
|
|
|
bias_tz, platform::MKLDNNGetDataType<T>(), memory::format::x);
|
|
|
|
conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, bias_md, dst_md,
|
|
|
|
conv_pd =
|
|
|
|
strides, paddings, mkldnn_engine);
|
|
|
|
ConvFwdPrimitiveDesc(src_md, weights_md, bias_md, dst_md, strides,
|
|
|
|
|
|
|
|
paddings, mkldnn_engine, fuse_relu);
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides,
|
|
|
|
conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides,
|
|
|
|
paddings, mkldnn_engine);
|
|
|
|
paddings, mkldnn_engine, fuse_relu);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Save conv_pd/src_memory/weights_memory for backward pass
|
|
|
|
// Save conv_pd/src_memory/weights_memory for backward pass
|
|
|
|
dev_ctx.SetBlob(key_conv_pd, conv_pd);
|
|
|
|
dev_ctx.SetBlob(key_conv_pd, conv_pd);
|
|
|
@ -371,7 +376,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
|
|
|
|
auto src_memory_p =
|
|
|
|
auto src_memory_p =
|
|
|
|
handler.AcquireSrcMemoryFromPrimitive(user_src_memory_p, pipeline);
|
|
|
|
handler.AcquireSrcMemoryFromPrimitive(user_src_memory_p, pipeline);
|
|
|
|
auto weights_memory_p = handler.AcquireWeightsMemoryFromPrimitive(
|
|
|
|
auto weights_memory_p = handler.AcquireWeightsMemoryFromPrimitive(
|
|
|
|
user_weights_memory_p, pipeline);
|
|
|
|
user_weights_memory_p, pipeline, is_test);
|
|
|
|
auto dst_memory_p =
|
|
|
|
auto dst_memory_p =
|
|
|
|
handler.AcquireDstMemoryFromPrimitive(to_void_cast<T>(output_data));
|
|
|
|
handler.AcquireDstMemoryFromPrimitive(to_void_cast<T>(output_data));
|
|
|
|
|
|
|
|
|
|
|
@ -402,11 +407,26 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
private:
|
|
|
|
|
|
|
|
mkldnn::primitive_attr AddRelu() const {
|
|
|
|
|
|
|
|
// Fusion with ReLU layer is executed through the PostOps feature. Create a
|
|
|
|
|
|
|
|
// PostOps object and configure it to execute an eltwise relu operation.
|
|
|
|
|
|
|
|
mkldnn::primitive_attr conv_attr;
|
|
|
|
|
|
|
|
constexpr float scale = 1.0f;
|
|
|
|
|
|
|
|
constexpr float negative_slope = 0.0f;
|
|
|
|
|
|
|
|
constexpr float placeholder = 0.0f;
|
|
|
|
|
|
|
|
mkldnn::post_ops post_operations;
|
|
|
|
|
|
|
|
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu,
|
|
|
|
|
|
|
|
negative_slope, placeholder);
|
|
|
|
|
|
|
|
conv_attr.set_post_ops(post_operations);
|
|
|
|
|
|
|
|
return conv_attr;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::unique_ptr<mkldnn::convolution_forward::primitive_desc>
|
|
|
|
std::unique_ptr<mkldnn::convolution_forward::primitive_desc>
|
|
|
|
ConvFwdPrimitiveDesc(const memory::desc& src, const memory::desc& weights,
|
|
|
|
ConvFwdPrimitiveDesc(const memory::desc& src, const memory::desc& weights,
|
|
|
|
const memory::desc& dst, const std::vector<int>& strides,
|
|
|
|
const memory::desc& dst, const std::vector<int>& strides,
|
|
|
|
const std::vector<int>& paddings,
|
|
|
|
const std::vector<int>& paddings,
|
|
|
|
const mkldnn::engine& engine) const {
|
|
|
|
const mkldnn::engine& engine,
|
|
|
|
|
|
|
|
const bool fuse_relu) const {
|
|
|
|
memory::dims stride_dims = {strides[0], strides[1]};
|
|
|
|
memory::dims stride_dims = {strides[0], strides[1]};
|
|
|
|
memory::dims padding_dims = {paddings[0], paddings[1]};
|
|
|
|
memory::dims padding_dims = {paddings[0], paddings[1]};
|
|
|
|
|
|
|
|
|
|
|
@ -415,8 +435,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
|
|
|
|
dst, stride_dims, padding_dims, padding_dims,
|
|
|
|
dst, stride_dims, padding_dims, padding_dims,
|
|
|
|
mkldnn::padding_kind::zero);
|
|
|
|
mkldnn::padding_kind::zero);
|
|
|
|
|
|
|
|
|
|
|
|
auto p_conv_pd =
|
|
|
|
mkldnn::primitive_attr conv_attr;
|
|
|
|
new mkldnn::convolution_forward::primitive_desc(conv_desc, engine);
|
|
|
|
if (fuse_relu) {
|
|
|
|
|
|
|
|
conv_attr = AddRelu();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
auto p_conv_pd = new mkldnn::convolution_forward::primitive_desc(
|
|
|
|
|
|
|
|
conv_desc, conv_attr, engine);
|
|
|
|
|
|
|
|
|
|
|
|
return std::unique_ptr<mkldnn::convolution_forward::primitive_desc>(
|
|
|
|
return std::unique_ptr<mkldnn::convolution_forward::primitive_desc>(
|
|
|
|
p_conv_pd);
|
|
|
|
p_conv_pd);
|
|
|
@ -427,7 +452,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
|
|
|
|
const memory::desc& bias, const memory::desc& dst,
|
|
|
|
const memory::desc& bias, const memory::desc& dst,
|
|
|
|
const std::vector<int>& strides,
|
|
|
|
const std::vector<int>& strides,
|
|
|
|
const std::vector<int>& paddings,
|
|
|
|
const std::vector<int>& paddings,
|
|
|
|
const mkldnn::engine& engine) const {
|
|
|
|
const mkldnn::engine& engine,
|
|
|
|
|
|
|
|
const bool fuse_relu) const {
|
|
|
|
memory::dims stride_dims = {strides[0], strides[1]};
|
|
|
|
memory::dims stride_dims = {strides[0], strides[1]};
|
|
|
|
memory::dims padding_dims = {paddings[0], paddings[1]};
|
|
|
|
memory::dims padding_dims = {paddings[0], paddings[1]};
|
|
|
|
|
|
|
|
|
|
|
@ -436,8 +462,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
|
|
|
|
bias, dst, stride_dims, padding_dims, padding_dims,
|
|
|
|
bias, dst, stride_dims, padding_dims, padding_dims,
|
|
|
|
mkldnn::padding_kind::zero);
|
|
|
|
mkldnn::padding_kind::zero);
|
|
|
|
|
|
|
|
|
|
|
|
auto p_conv_pd =
|
|
|
|
mkldnn::primitive_attr conv_attr;
|
|
|
|
new mkldnn::convolution_forward::primitive_desc(conv_desc, engine);
|
|
|
|
if (fuse_relu) {
|
|
|
|
|
|
|
|
conv_attr = AddRelu();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
auto p_conv_pd = new mkldnn::convolution_forward::primitive_desc(
|
|
|
|
|
|
|
|
conv_desc, conv_attr, engine);
|
|
|
|
|
|
|
|
|
|
|
|
return std::unique_ptr<mkldnn::convolution_forward::primitive_desc>(
|
|
|
|
return std::unique_ptr<mkldnn::convolution_forward::primitive_desc>(
|
|
|
|
p_conv_pd);
|
|
|
|
p_conv_pd);
|
|
|
|